From 7d4c1ea2be825bc65e0de0fb34f3531aaf03e673 Mon Sep 17 00:00:00 2001
From: "Alexander A. Klimov" <grandmaster@al2klimov.de>
Date: Wed, 8 Jul 2020 13:35:46 +0200
Subject: [PATCH 001/262] EDAC: Replace HTTP links with HTTPS ones

Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

  Deterministic algorithm:
  For each file:
    If not .svg:
      For each line:
        If doesn't contain `\bxmlns\b`:
          For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
  	  If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
              If both the HTTP and HTTPS versions
              return 200 OK and serve the same content:
                Replace HTTP with HTTPS.

 [ bp: Merge all EDAC patches into a single one. ]

Signed-off-by: Alexander A. Klimov <grandmaster@al2klimov.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Tero Kristo <t-kristo@ti.com> # ti_edac
Link: https://lkml.kernel.org/r/20200708113546.14135-1-grandmaster@al2klimov.de
---
 drivers/edac/e752x_edac.c   | 2 +-
 drivers/edac/ghes_edac.c    | 2 +-
 drivers/edac/i5400_edac.c   | 4 ++--
 drivers/edac/i7300_edac.c   | 4 ++--
 drivers/edac/i7core_edac.c  | 4 ++--
 drivers/edac/ie31200_edac.c | 6 +++---
 drivers/edac/sb_edac.c      | 2 +-
 drivers/edac/ti_edac.c      | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index de732dc2ef33e8..313d08018166b2 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -7,7 +7,7 @@
  * Implement support for the e7520, E7525, e7320 and i3100 memory controllers.
  *
  * Datasheets:
- *	http://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html
+ *	https://www.intel.in/content/www/in/en/chipsets/e7525-memory-controller-hub-datasheet.html
  *	ftp://download.intel.com/design/intarch/datashts/31345803.pdf
  *
  * Written by Tom Zimmerman
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index da60c29468a7cf..2c938373e83281 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -4,7 +4,7 @@
  *
  * Copyright (c) 2013 by Mauro Carvalho Chehab
  *
- * Red Hat Inc. http://www.redhat.com
+ * Red Hat Inc. https://www.redhat.com
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index f131c05ade9fa2..92d63eb533aee7 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -8,7 +8,7 @@
  *	 Ben Woodard <woodard@redhat.com>
  *	 Mauro Carvalho Chehab
  *
- * Red Hat Inc. http://www.redhat.com
+ * Red Hat Inc. https://www.redhat.com
  *
  * Forked and adapted from the i5000_edac driver which was
  * written by Douglas Thompson Linux Networx <norsk5@xmission.com>
@@ -1460,7 +1460,7 @@ module_exit(i5400_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Ben Woodard <woodard@redhat.com>");
 MODULE_AUTHOR("Mauro Carvalho Chehab");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
 MODULE_DESCRIPTION("MC Driver for Intel I5400 memory controllers - "
 		   I5400_REVISION);
 
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index 2e9bbe56cde9eb..4f28b8c8d3789f 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -5,7 +5,7 @@
  * Copyright (c) 2010 by:
  *	 Mauro Carvalho Chehab
  *
- * Red Hat Inc. http://www.redhat.com
+ * Red Hat Inc. https://www.redhat.com
  *
  * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
  *	http://www.intel.com/Assets/PDF/datasheet/318082.pdf
@@ -1206,7 +1206,7 @@ module_exit(i7300_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mauro Carvalho Chehab");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
 MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
 		   I7300_REVISION);
 
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 5860ca41185cf1..9146d1cde600b1 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -9,7 +9,7 @@
  * Copyright (c) 2009-2010 by:
  *	 Mauro Carvalho Chehab
  *
- * Red Hat Inc. http://www.redhat.com
+ * Red Hat Inc. https://www.redhat.com
  *
  * Forked and adapted from the i5400_edac driver
  *
@@ -2391,7 +2391,7 @@ module_exit(i7core_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mauro Carvalho Chehab");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
 		   I7CORE_REVISION);
 
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index ebe50996cc423e..c47963240b6596 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -9,7 +9,7 @@
  * Since the DRAM controller is on the cpu chip, we can use its PCI device
  * id to identify these processors.
  *
- * PCI DRAM controller device ids (Taken from The PCI ID Repository - http://pci-ids.ucw.cz/)
+ * PCI DRAM controller device ids (Taken from The PCI ID Repository - https://pci-ids.ucw.cz/)
  *
  * 0108: Xeon E3-1200 Processor Family DRAM Controller
  * 010c: Xeon E3-1200/2nd Generation Core Processor Family DRAM Controller
@@ -23,9 +23,9 @@
  * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers
  *
  * Based on Intel specification:
- * http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
+ * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
  * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
- * http://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
+ * https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
  * https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html
  *
  * According to the above datasheet (p.16):
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index d414698ca32421..a6704e73fcce5c 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -3552,6 +3552,6 @@ MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Mauro Carvalho Chehab");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
+MODULE_AUTHOR("Red Hat Inc. (https://www.redhat.com)");
 MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - "
 		   SBRIDGE_REVISION);
diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index 8be3e89a510e42..6e52796a0b4147 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (C) 2017 Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (C) 2017 Texas Instruments Incorporated - https://www.ti.com/
  *
  * Texas Instruments DDR3 ECC error correction and detection driver
  *

From dc7a8476cffcb98b008ca44fdadf235a3ad7e7e2 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Wed, 8 Jul 2020 15:35:15 +0000
Subject: [PATCH 002/262] EDAC/mce_amd: Add new error descriptions for existing
 types

A few existing MCA bank types will have new error types in future SMCA
systems.

Add the descriptions for the new error types.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200708153515.1911642-1-Yazen.Ghannam@amd.com
---
 drivers/edac/mce_amd.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 325aedf46ff2b8..4fd06a3dc6fe0c 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -210,6 +210,11 @@ static const char * const smca_if_mce_desc[] = {
 	"L2 BTB Multi-Match Error",
 	"L2 Cache Response Poison Error",
 	"System Read Data Error",
+	"Hardware Assertion Error",
+	"L1-TLB Multi-Hit",
+	"L2-TLB Multi-Hit",
+	"BSR Parity Error",
+	"CT MCE",
 };
 
 static const char * const smca_l2_mce_desc[] = {
@@ -228,7 +233,8 @@ static const char * const smca_de_mce_desc[] = {
 	"Fetch address FIFO parity error",
 	"Patch RAM data parity error",
 	"Patch RAM sequencer parity error",
-	"Micro-op buffer parity error"
+	"Micro-op buffer parity error",
+	"Hardware Assertion MCA Error",
 };
 
 static const char * const smca_ex_mce_desc[] = {
@@ -244,6 +250,8 @@ static const char * const smca_ex_mce_desc[] = {
 	"Scheduling queue parity error",
 	"Branch buffer queue parity error",
 	"Hardware Assertion error",
+	"Spec Map parity error",
+	"Retire Map parity error",
 };
 
 static const char * const smca_fp_mce_desc[] = {
@@ -360,6 +368,7 @@ static const char * const smca_smu2_mce_desc[] = {
 	"Instruction Tag Cache Bank A ECC or parity error",
 	"Instruction Tag Cache Bank B ECC or parity error",
 	"System Hub Read Buffer ECC or parity error",
+	"PHY RAM ECC error",
 };
 
 static const char * const smca_mp5_mce_desc[] = {

From eb3411c95d6dd144c9c977e73d951ccfade91da7 Mon Sep 17 00:00:00 2001
From: Talel Shenhar <talel@amazon.com>
Date: Sun, 16 Aug 2020 21:55:50 +0300
Subject: [PATCH 003/262] dt-bindings: EDAC: Add Amazon's Annapurna Labs Memory
 Controller binding

Document Amazon's Annapurna Labs Memory Controller EDAC SoC binding.

Signed-off-by: Talel Shenhar <talel@amazon.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lkml.kernel.org/r/20200816185551.19108-2-talel@amazon.com
---
 .../bindings/edac/amazon,al-mc-edac.yaml      | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/edac/amazon,al-mc-edac.yaml

diff --git a/Documentation/devicetree/bindings/edac/amazon,al-mc-edac.yaml b/Documentation/devicetree/bindings/edac/amazon,al-mc-edac.yaml
new file mode 100644
index 00000000000000..a25387df0865aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/edac/amazon,al-mc-edac.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/edac/amazon,al-mc-edac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amazon's Annapurna Labs Memory Controller EDAC
+
+maintainers:
+  - Talel Shenhar <talel@amazon.com>
+  - Talel Shenhar <talelshenhar@gmail.com>
+
+description: |
+  EDAC node is defined to describe on-chip error detection and correction for
+  Amazon's Annapurna Labs Memory Controller.
+
+properties:
+
+  compatible:
+    const: amazon,al-mc-edac
+
+  reg:
+    maxItems: 1
+
+  "#address-cells":
+    const: 2
+
+  "#size-cells":
+    const: 2
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: uncorrectable error interrupt
+      - description: correctable error interrupt
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: ue
+      - const: ce
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+        edac@f0080000 {
+          #address-cells = <2>;
+          #size-cells = <2>;
+          compatible = "amazon,al-mc-edac";
+          reg = <0x0 0xf0080000 0x0 0x00010000>;
+          interrupt-parent = <&amazon_al_system_fabric>;
+          interrupt-names = "ue";
+          interrupts = <20 IRQ_TYPE_LEVEL_HIGH>;
+        };
+    };

From e23a7cdeb3da8d3ca943fced1420020c1d524684 Mon Sep 17 00:00:00 2001
From: Talel Shenhar <talel@amazon.com>
Date: Sun, 16 Aug 2020 21:55:51 +0300
Subject: [PATCH 004/262] EDAC/al-mc-edac: Add Amazon's Annapurna Labs Memory
 Controller driver

The Amazon's Annapurna Labs Memory Controller EDAC supports ECC capability
for error detection and correction (Single bit error correction, Double
detection). This driver introduces EDAC driver for that capability.

 [ bp: Remove "EDAC" string from Kconfig tristate as it is redundant. ]

Signed-off-by: Talel Shenhar <talel@amazon.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lkml.kernel.org/r/20200816185551.19108-3-talel@amazon.com
---
 MAINTAINERS               |   7 +
 drivers/edac/Kconfig      |   7 +
 drivers/edac/Makefile     |   1 +
 drivers/edac/al_mc_edac.c | 354 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 369 insertions(+)
 create mode 100644 drivers/edac/al_mc_edac.c

diff --git a/MAINTAINERS b/MAINTAINERS
index deaafb617361c7..43df0585c4b8c7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -802,6 +802,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/interrupt-controller/amazon,al-fic.txt
 F:	drivers/irqchip/irq-al-fic.c
 
+AMAZON ANNAPURNA LABS MEMORY CONTROLLER EDAC
+M:	Talel Shenhar <talel@amazon.com>
+M:	Talel Shenhar <talelshenhar@gmail.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/edac/amazon,al-mc-edac.yaml
+F:	drivers/edac/al_mc_edac.c
+
 AMAZON ANNAPURNA LABS THERMAL MMIO DRIVER
 M:	Talel Shenhar <talel@amazon.com>
 S:	Maintained
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 7b6ec3014ba2dc..7a47680d6f0786 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -100,6 +100,13 @@ config EDAC_AMD64_ERROR_INJECTION
 	  In addition, there are two control files, inject_read and inject_write,
 	  which trigger the DRAM ECC Read and Write respectively.
 
+config EDAC_AL_MC
+	tristate "Amazon's Annapurna Lab Memory Controller"
+	depends on (ARCH_ALPINE || COMPILE_TEST)
+	help
+	  Support for error detection and correction for Amazon's Annapurna
+	  Labs Alpine chips which allow 1 bit correction and 2 bits detection.
+
 config EDAC_AMD76X
 	tristate "AMD 76x (760, 762, 768)"
 	depends on PCI && X86_32
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 269e15118ceac3..3a849168780dd1 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_EDAC_GHES)			+= ghes_edac.o
 edac_mce_amd-y				:= mce_amd.o
 obj-$(CONFIG_EDAC_DECODE_MCE)		+= edac_mce_amd.o
 
+obj-$(CONFIG_EDAC_AL_MC)		+= al_mc_edac.o
 obj-$(CONFIG_EDAC_AMD76X)		+= amd76x_edac.o
 obj-$(CONFIG_EDAC_CPC925)		+= cpc925_edac.o
 obj-$(CONFIG_EDAC_I5000)		+= i5000_edac.o
diff --git a/drivers/edac/al_mc_edac.c b/drivers/edac/al_mc_edac.c
new file mode 100644
index 00000000000000..7d4f396c27b554
--- /dev/null
+++ b/drivers/edac/al_mc_edac.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ */
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/edac.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include "edac_module.h"
+
+/* Registers Offset */
+#define AL_MC_ECC_CFG		0x70
+#define AL_MC_ECC_CLEAR		0x7c
+#define AL_MC_ECC_ERR_COUNT	0x80
+#define AL_MC_ECC_CE_ADDR0	0x84
+#define AL_MC_ECC_CE_ADDR1	0x88
+#define AL_MC_ECC_UE_ADDR0	0xa4
+#define AL_MC_ECC_UE_ADDR1	0xa8
+#define AL_MC_ECC_CE_SYND0	0x8c
+#define AL_MC_ECC_CE_SYND1	0x90
+#define AL_MC_ECC_CE_SYND2	0x94
+#define AL_MC_ECC_UE_SYND0	0xac
+#define AL_MC_ECC_UE_SYND1	0xb0
+#define AL_MC_ECC_UE_SYND2	0xb4
+
+/* Registers Fields */
+#define AL_MC_ECC_CFG_SCRUB_DISABLED	BIT(4)
+
+#define AL_MC_ECC_CLEAR_UE_COUNT	BIT(3)
+#define AL_MC_ECC_CLEAR_CE_COUNT	BIT(2)
+#define AL_MC_ECC_CLEAR_UE_ERR		BIT(1)
+#define AL_MC_ECC_CLEAR_CE_ERR		BIT(0)
+
+#define AL_MC_ECC_ERR_COUNT_UE		GENMASK(31, 16)
+#define AL_MC_ECC_ERR_COUNT_CE		GENMASK(15, 0)
+
+#define AL_MC_ECC_CE_ADDR0_RANK		GENMASK(25, 24)
+#define AL_MC_ECC_CE_ADDR0_ROW		GENMASK(17, 0)
+
+#define AL_MC_ECC_CE_ADDR1_BG		GENMASK(25, 24)
+#define AL_MC_ECC_CE_ADDR1_BANK		GENMASK(18, 16)
+#define AL_MC_ECC_CE_ADDR1_COLUMN	GENMASK(11, 0)
+
+#define AL_MC_ECC_UE_ADDR0_RANK		GENMASK(25, 24)
+#define AL_MC_ECC_UE_ADDR0_ROW		GENMASK(17, 0)
+
+#define AL_MC_ECC_UE_ADDR1_BG		GENMASK(25, 24)
+#define AL_MC_ECC_UE_ADDR1_BANK		GENMASK(18, 16)
+#define AL_MC_ECC_UE_ADDR1_COLUMN	GENMASK(11, 0)
+
+#define DRV_NAME "al_mc_edac"
+#define AL_MC_EDAC_MSG_MAX 256
+
+struct al_mc_edac {
+	void __iomem *mmio_base;
+	spinlock_t lock;
+	int irq_ce;
+	int irq_ue;
+};
+
+static void prepare_msg(char *message, size_t buffer_size,
+			enum hw_event_mc_err_type type,
+			u8 rank, u32 row, u8 bg, u8 bank, u16 column,
+			u32 syn0, u32 syn1, u32 syn2)
+{
+	snprintf(message, buffer_size,
+		 "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x",
+		 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE",
+		 rank, row, bg, bank, column, syn0, syn1, syn2);
+}
+
+static int handle_ce(struct mem_ctl_info *mci)
+{
+	u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row;
+	struct al_mc_edac *al_mc = mci->pvt_info;
+	char msg[AL_MC_EDAC_MSG_MAX];
+	u16 ce_count, column;
+	unsigned long flags;
+	u8 rank, bg, bank;
+
+	eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
+	ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt);
+	if (!ce_count)
+		return 0;
+
+	ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0);
+	ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1);
+	ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0);
+	ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1);
+	ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2);
+
+	writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR,
+		       al_mc->mmio_base + AL_MC_ECC_CLEAR);
+
+	dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
+		ecccaddr0, ecccaddr1);
+
+	rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0);
+	row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0);
+
+	bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1);
+	bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1);
+	column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1);
+
+	prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED,
+		    rank, row, bg, bank, column,
+		    ecccsyn0, ecccsyn1, ecccsyn2);
+
+	spin_lock_irqsave(&al_mc->lock, flags);
+	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
+			     ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
+	spin_unlock_irqrestore(&al_mc->lock, flags);
+
+	return ce_count;
+}
+
+static int handle_ue(struct mem_ctl_info *mci)
+{
+	u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row;
+	struct al_mc_edac *al_mc = mci->pvt_info;
+	char msg[AL_MC_EDAC_MSG_MAX];
+	u16 ue_count, column;
+	unsigned long flags;
+	u8 rank, bg, bank;
+
+	eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT);
+	ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt);
+	if (!ue_count)
+		return 0;
+
+	eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0);
+	eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1);
+	eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0);
+	eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1);
+	eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2);
+
+	writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR,
+		       al_mc->mmio_base + AL_MC_ECC_CLEAR);
+
+	dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n",
+		eccuaddr0, eccuaddr1);
+
+	rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0);
+	row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0);
+
+	bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1);
+	bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1);
+	column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1);
+
+	prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED,
+		    rank, row, bg, bank, column,
+		    eccusyn0, eccusyn1, eccusyn2);
+
+	spin_lock_irqsave(&al_mc->lock, flags);
+	edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
+			     ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg);
+	spin_unlock_irqrestore(&al_mc->lock, flags);
+
+	return ue_count;
+}
+
+static void al_mc_edac_check(struct mem_ctl_info *mci)
+{
+	struct al_mc_edac *al_mc = mci->pvt_info;
+
+	if (al_mc->irq_ue <= 0)
+		handle_ue(mci);
+
+	if (al_mc->irq_ce <= 0)
+		handle_ce(mci);
+}
+
+static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info)
+{
+	struct platform_device *pdev = info;
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	if (handle_ue(mci))
+		return IRQ_HANDLED;
+	return IRQ_NONE;
+}
+
+static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info)
+{
+	struct platform_device *pdev = info;
+	struct mem_ctl_info *mci = platform_get_drvdata(pdev);
+
+	if (handle_ce(mci))
+		return IRQ_HANDLED;
+	return IRQ_NONE;
+}
+
+static enum scrub_type get_scrub_mode(void __iomem *mmio_base)
+{
+	u32 ecccfg0;
+
+	ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG);
+
+	if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0))
+		return SCRUB_NONE;
+	else
+		return SCRUB_HW_SRC;
+}
+
+static void devm_al_mc_edac_free(void *data)
+{
+	edac_mc_free(data);
+}
+
+static void devm_al_mc_edac_del(void *data)
+{
+	edac_mc_del_mc(data);
+}
+
+static int al_mc_edac_probe(struct platform_device *pdev)
+{
+	struct edac_mc_layer layers[1];
+	struct mem_ctl_info *mci;
+	struct al_mc_edac *al_mc;
+	void __iomem *mmio_base;
+	struct dimm_info *dimm;
+	int ret;
+
+	mmio_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mmio_base)) {
+		dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
+			PTR_ERR(mmio_base));
+		return PTR_ERR(mmio_base);
+	}
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = false;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+			    sizeof(struct al_mc_edac));
+	if (!mci)
+		return -ENOMEM;
+
+	ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci);
+	if (ret) {
+		edac_mc_free(mci);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, mci);
+	al_mc = mci->pvt_info;
+
+	al_mc->mmio_base = mmio_base;
+
+	al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue");
+	if (al_mc->irq_ue <= 0)
+		dev_dbg(&pdev->dev,
+			"no IRQ defined for UE - falling back to polling\n");
+
+	al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce");
+	if (al_mc->irq_ce <= 0)
+		dev_dbg(&pdev->dev,
+			"no IRQ defined for CE - falling back to polling\n");
+
+	/*
+	 * In case both interrupts (ue/ce) are to be found, use interrupt mode.
+	 * In case none of the interrupt are foud, use polling mode.
+	 * In case only one interrupt is found, use interrupt mode for it but
+	 * keep polling mode enable for the other.
+	 */
+	if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) {
+		edac_op_state = EDAC_OPSTATE_POLL;
+		mci->edac_check = al_mc_edac_check;
+	} else {
+		edac_op_state = EDAC_OPSTATE_INT;
+	}
+
+	spin_lock_init(&al_mc->lock);
+
+	mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = DRV_NAME;
+	mci->ctl_name = "al_mc";
+	mci->pdev = &pdev->dev;
+	mci->scrub_mode = get_scrub_mode(mmio_base);
+
+	dimm = *mci->dimms;
+	dimm->grain = 1;
+
+	ret = edac_mc_add_mc(mci);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"fail to add memory controller device (%d)\n",
+			ret);
+		return ret;
+	}
+
+	ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev);
+	if (ret) {
+		edac_mc_del_mc(&pdev->dev);
+		return ret;
+	}
+
+	if (al_mc->irq_ue > 0) {
+		ret = devm_request_irq(&pdev->dev,
+				       al_mc->irq_ue,
+				       al_mc_edac_irq_handler_ue,
+				       IRQF_SHARED,
+				       pdev->name,
+				       pdev);
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"failed to request UE IRQ %d (%d)\n",
+				al_mc->irq_ue, ret);
+			return ret;
+		}
+	}
+
+	if (al_mc->irq_ce > 0) {
+		ret = devm_request_irq(&pdev->dev,
+				       al_mc->irq_ce,
+				       al_mc_edac_irq_handler_ce,
+				       IRQF_SHARED,
+				       pdev->name,
+				       pdev);
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"failed to request CE IRQ %d (%d)\n",
+				al_mc->irq_ce, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id al_mc_edac_of_match[] = {
+	{ .compatible = "amazon,al-mc-edac", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, al_mc_edac_of_match);
+
+static struct platform_driver al_mc_edac_driver = {
+	.probe = al_mc_edac_probe,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = al_mc_edac_of_match,
+	},
+};
+
+module_platform_driver(al_mc_edac_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Talel Shenhar");
+MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver");

From bd17e0b7714fb7e13f340965470bf5cada535f76 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 14 Jul 2020 22:23:08 +0800
Subject: [PATCH 005/262] EDAC/thunderx: Make symbol lmc_dfs_ents static

Symbol 'lmc_dfs_ents' is not used outside of thunderx_edac.c, so
make it static:

  drivers/edac/thunderx_edac.c:457:22: warning:
   symbol 'lmc_dfs_ents' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Robert Richter <rrichter@marvell.com>
Link: https://lkml.kernel.org/r/20200714142308.46612-1-weiyongjun1@huawei.com
---
 drivers/edac/thunderx_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
index 4af9744cc6d021..0eb5eb97fd7427 100644
--- a/drivers/edac/thunderx_edac.c
+++ b/drivers/edac/thunderx_edac.c
@@ -454,7 +454,7 @@ DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
 DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
 DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
 
-struct debugfs_entry *lmc_dfs_ents[] = {
+static struct debugfs_entry *lmc_dfs_ents[] = {
 	&debugfs_mask0,
 	&debugfs_mask2,
 	&debugfs_parity_test,

From 8c91b81933d35ae4c3bf524b03595e9ff2198907 Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Wed, 29 Jul 2020 12:45:11 -0500
Subject: [PATCH 006/262] EDAC/socfpga: Transfer SoCFPGA EDAC maintainership

Thor Thayer is leaving Intel and will no longer be able to maintain the
EDAC for SoCFPGA driver, thus transfer maintainership to Dinh Nguyen.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Thor Thayer <thor.thayer@linux.intel.com>
Link: https://lkml.kernel.org/r/20200729174511.4256-1-dinguyen@kernel.org
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index deaafb617361c7..75d5e0ae4d6eaf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2505,7 +2505,7 @@ S:	Maintained
 F:	drivers/clk/socfpga/
 
 ARM/SOCFPGA EDAC SUPPORT
-M:	Thor Thayer <thor.thayer@linux.intel.com>
+M:	Dinh Nguyen <dinguyen@kernel.org>
 S:	Maintained
 F:	drivers/edac/altera_edac.
 

From bf9c912f9a649776c2d741310486a6984edaac72 Mon Sep 17 00:00:00 2001
From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Date: Thu, 6 Aug 2020 20:28:33 -0700
Subject: [PATCH 007/262] x86/cpu: Use SERIALIZE in sync_core() when available

The SERIALIZE instruction gives software a way to force the processor to
complete all modifications to flags, registers and memory from previous
instructions and drain all buffered writes to memory before the next
instruction is fetched and executed. Thus, it serves the purpose of
sync_core(). Use it when available.

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200807032833.17484-1-ricardo.neri-calderon@linux.intel.com
---
 arch/x86/include/asm/special_insns.h |  6 ++++++
 arch/x86/include/asm/sync_core.h     | 26 ++++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 59a3e13204c348..5999b0b3dd4a87 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p)
 
 #define nop() asm volatile ("nop")
 
+static inline void serialize(void)
+{
+	/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
+	asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index fdb5b356e59b07..4631c0f969d419 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -5,6 +5,7 @@
 #include <linux/preempt.h>
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
+#include <asm/special_insns.h>
 
 #ifdef CONFIG_X86_32
 static inline void iret_to_self(void)
@@ -54,14 +55,23 @@ static inline void iret_to_self(void)
 static inline void sync_core(void)
 {
 	/*
-	 * There are quite a few ways to do this.  IRET-to-self is nice
-	 * because it works on every CPU, at any CPL (so it's compatible
-	 * with paravirtualization), and it never exits to a hypervisor.
-	 * The only down sides are that it's a bit slow (it seems to be
-	 * a bit more than 2x slower than the fastest options) and that
-	 * it unmasks NMIs.  The "push %cs" is needed because, in
-	 * paravirtual environments, __KERNEL_CS may not be a valid CS
-	 * value when we do IRET directly.
+	 * The SERIALIZE instruction is the most straightforward way to
+	 * do this but it not universally available.
+	 */
+	if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
+		serialize();
+		return;
+	}
+
+	/*
+	 * For all other processors, there are quite a few ways to do this.
+	 * IRET-to-self is nice because it works on every CPU, at any CPL
+	 * (so it's compatible with paravirtualization), and it never exits
+	 * to a hypervisor. The only down sides are that it's a bit slow
+	 * (it seems to be a bit more than 2x slower than the fastest
+	 * options) and that it unmasks NMIs.  The "push %cs" is needed
+	 * because, in paravirtual environments, __KERNEL_CS may not be a
+	 * valid CS value when we do IRET directly.
 	 *
 	 * In case NMI unmasking or performance ever becomes a problem,
 	 * the next best option appears to be MOV-to-CR2 and an

From 86109813990b5d6d6cfb8072382ee69d11ea9460 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 7 Jul 2020 19:47:22 +0200
Subject: [PATCH 008/262] x86/cpu: Use XGETBV and XSETBV mnemonics in
 fpu/internal.h

Current minimum required version of binutils is 2.23, which supports
XGETBV and XSETBV instruction mnemonics.

Replace the byte-wise specification of XGETBV and XSETBV with these
proper mnemonics.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200707174722.58651-1-ubizjak@gmail.com
---
 arch/x86/include/asm/fpu/internal.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0a460f2a3f9041..21a8b52594774d 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index)
 {
 	u32 eax, edx;
 
-	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
-		     : "=a" (eax), "=d" (edx)
-		     : "c" (index));
+	asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
 	return eax + ((u64)edx << 32);
 }
 
@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value)
 	u32 eax = value;
 	u32 edx = value >> 32;
 
-	asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
-		     : : "a" (eax), "d" (edx), "c" (index));
+	asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
 }
 
 #endif /* _ASM_X86_FPU_INTERNAL_H */

From abe8f12b44250d02937665033a8b750c1bfeb26e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:20 +0000
Subject: [PATCH 009/262] x86/resctrl: Remove unused struct
 mbm_state::chunks_bw

Nothing reads struct mbm_states's chunks_bw value, its a copy of
chunks. Remove it.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-2-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/internal.h | 2 --
 arch/x86/kernel/cpu/resctrl/monitor.c  | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 5ffa32256b3b26..72bb210684667d 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -283,7 +283,6 @@ struct rftype {
  * struct mbm_state - status for each MBM counter in each domain
  * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
  * @prev_msr	Value of IA32_QM_CTR for this RMID last time we read it
- * @chunks_bw	Total local data moved. Used for bandwidth calculation
  * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
  * @prev_bw	The most recent bandwidth in MBps
  * @delta_bw	Difference between the current and previous bandwidth
@@ -292,7 +291,6 @@ struct rftype {
 struct mbm_state {
 	u64	chunks;
 	u64	prev_msr;
-	u64	chunks_bw;
 	u64	prev_bw_msr;
 	u32	prev_bw;
 	u32	delta_bw;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 837d7d012b7b14..d6b92d7487a738 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -279,8 +279,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
 		return;
 
 	chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
-	m->chunks_bw += chunks;
-	m->chunks = m->chunks_bw;
+	m->chunks += chunks;
 	cur_bw = (chunks * r->mon_scale) >> 20;
 
 	if (m->delta_comp)

From e89f85b9171665c917dca59920884f3d4fe0b1ef Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:21 +0000
Subject: [PATCH 010/262] x86/resctrl: Remove struct rdt_membw::max_delay

max_delay is used by x86's __get_mem_config_intel() as a local variable.
Remove it, replacing it with a local variable.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-3-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/core.c     | 8 ++++----
 arch/x86/kernel/cpu/resctrl/internal.h | 3 ---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 6a9df71c1b9eae..9225ee5cca6fee 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -254,16 +254,16 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
 {
 	union cpuid_0x10_3_eax eax;
 	union cpuid_0x10_x_edx edx;
-	u32 ebx, ecx;
+	u32 ebx, ecx, max_delay;
 
 	cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
 	r->num_closid = edx.split.cos_max + 1;
-	r->membw.max_delay = eax.split.max_delay + 1;
+	max_delay = eax.split.max_delay + 1;
 	r->default_ctrl = MAX_MBA_BW;
 	if (ecx & MBA_IS_LINEAR) {
 		r->membw.delay_linear = true;
-		r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
-		r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
+		r->membw.min_bw = MAX_MBA_BW - max_delay;
+		r->membw.bw_gran = MAX_MBA_BW - max_delay;
 	} else {
 		if (!rdt_get_mb_table(r))
 			return false;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 72bb210684667d..1eb39bd24990d5 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -369,8 +369,6 @@ struct rdt_cache {
 
 /**
  * struct rdt_membw - Memory bandwidth allocation related data
- * @max_delay:		Max throttle delay. Delay is the hardware
- *			representation for memory bandwidth.
  * @min_bw:		Minimum memory bandwidth percentage user can request
  * @bw_gran:		Granularity at which the memory bandwidth is allocated
  * @delay_linear:	True if memory B/W delay is in linear scale
@@ -378,7 +376,6 @@ struct rdt_cache {
  * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
  */
 struct rdt_membw {
-	u32		max_delay;
 	u32		min_bw;
 	u32		bw_gran;
 	u32		delay_linear;

From ae0fbedd2a18cd82a2c0c5605a0a60865bc54576 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:22 +0000
Subject: [PATCH 011/262] x86/resctrl: Fix stale comment

The comment in rdtgroup_init() refers to the non existent function
rdt_mount(), which has now been renamed rdt_get_tree(). Fix the
comment.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-4-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 3f844f14fc0a63..b0446176325643 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3196,7 +3196,7 @@ int __init rdtgroup_init(void)
 	 * It may also be ok since that would enable debugging of RDT before
 	 * resctrl is mounted.
 	 * The reason why the debugfs directory is created here and not in
-	 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
+	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
 	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
 	 * (the lockdep class of inode->i_rwsem). Other filesystem
 	 * interactions (eg. SyS_getdents) have the lock ordering:

From f995801ba3a0660cb352c8beb794379c82781ca3 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:23 +0000
Subject: [PATCH 012/262] x86/resctrl: Use container_of() in delayed_work
 handlers

mbm_handle_overflow() and cqm_handle_limbo() are both provided with
the domain's work_struct when called, but use get_domain_from_cpu()
to find the domain, along with the appropriate error handling.

container_of() saves some list walking and bitmap testing, use that
instead.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-5-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/monitor.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index d6b92d7487a738..54dffe574e67ac 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -477,19 +477,13 @@ void cqm_handle_limbo(struct work_struct *work)
 	mutex_lock(&rdtgroup_mutex);
 
 	r = &rdt_resources_all[RDT_RESOURCE_L3];
-	d = get_domain_from_cpu(cpu, r);
-
-	if (!d) {
-		pr_warn_once("Failure to get domain for limbo worker\n");
-		goto out_unlock;
-	}
+	d = container_of(work, struct rdt_domain, cqm_limbo.work);
 
 	__check_limbo(d, false);
 
 	if (has_busy_rmid(r, d))
 		schedule_delayed_work_on(cpu, &d->cqm_limbo, delay);
 
-out_unlock:
 	mutex_unlock(&rdtgroup_mutex);
 }
 
@@ -519,10 +513,7 @@ void mbm_handle_overflow(struct work_struct *work)
 		goto out_unlock;
 
 	r = &rdt_resources_all[RDT_RESOURCE_L3];
-
-	d = get_domain_from_cpu(cpu, r);
-	if (!d)
-		goto out_unlock;
+	d = container_of(work, struct rdt_domain, mbm_over.work);
 
 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
 		mbm_update(r, d, prgrp->mon.rmid);

From a21a4391f20c0ab45db452e22bc3e8afe8b36e46 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:24 +0000
Subject: [PATCH 013/262] x86/resctrl: Include pid.h

We are about to disturb the header soup. This header uses struct pid
and struct pid_namespace. Include their header.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-6-james.morse@arm.com
---
 include/linux/resctrl.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index daf5cf64c6a683..9b05af9b3e28d5 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -2,6 +2,8 @@
 #ifndef _RESCTRL_H
 #define _RESCTRL_H
 
+#include <linux/pid.h>
+
 #ifdef CONFIG_PROC_CPU_RESCTRL
 
 int proc_resctrl_show(struct seq_file *m,

From e6b2fac36fcc0b73cbef063d700a9841850e37a0 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:25 +0000
Subject: [PATCH 014/262] x86/resctrl: Use is_closid_match() in more places

rdtgroup_tasks_assigned() and show_rdt_tasks() loop over threads testing
for a CTRL/MON group match by closid/rmid with the provided rdtgrp.
Further down the file are helpers to do this, move these further up and
make use of them here.

These helpers additionally check for alloc/mon capable. This is harmless
as rdtgroup_mkdir() tests these capable flags before allowing the config
directories to be created.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-7-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 30 ++++++++++++--------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index b0446176325643..78f3be10e215e9 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -592,6 +592,18 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 	return ret;
 }
 
+static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
+{
+	return (rdt_alloc_capable &&
+	       (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
+}
+
+static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
+{
+	return (rdt_mon_capable &&
+	       (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
+}
+
 /**
  * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
  * @r: Resource group
@@ -607,8 +619,7 @@ int rdtgroup_tasks_assigned(struct rdtgroup *r)
 
 	rcu_read_lock();
 	for_each_process_thread(p, t) {
-		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
-		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
+		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
 			ret = 1;
 			break;
 		}
@@ -706,8 +717,7 @@ static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
 
 	rcu_read_lock();
 	for_each_process_thread(p, t) {
-		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
-		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
+		if (is_closid_match(t, r) || is_rmid_match(t, r))
 			seq_printf(s, "%d\n", t->pid);
 	}
 	rcu_read_unlock();
@@ -2245,18 +2255,6 @@ static int reset_all_ctrls(struct rdt_resource *r)
 	return 0;
 }
 
-static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
-{
-	return (rdt_alloc_capable &&
-		(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
-}
-
-static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
-{
-	return (rdt_mon_capable &&
-		(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
-}
-
 /*
  * Move tasks from one to the other group. If @from is NULL, then all tasks
  * in the systems are moved unconditionally (used for teardown).

From 41215b7947f1b1b86fd77a7bebd2320599aea7bd Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:26 +0000
Subject: [PATCH 015/262] x86/resctrl: Add struct rdt_membw::arch_needs_linear
 to explain AMD/Intel MBA difference

The configuration values user-space provides to the resctrl filesystem
are ABI. To make this work on another architecture, all the ABI bits
should be moved out of /arch/x86 and under /fs.

To do this, the differences between AMD and Intel CPUs needs to be
explained to resctrl via resource properties, instead of function
pointers that let the arch code accept subtly different values on
different platforms/architectures.

For MBA, Intel CPUs reject configuration attempts for non-linear
resources, whereas AMD ignore this field as its MBA resource is never
linear. To merge the parse/validate functions, this difference needs to
be explained.

Add struct rdt_membw::arch_needs_linear to indicate the arch code needs
the linear property to be true to configure this resource. AMD can set
this and delay_linear to false. Intel can set arch_needs_linear to
true to keep the existing "No support for non-linear MB domains" error
message for affected platforms.

 [ bp: convert "we" etc to passive voice. ]

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-8-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/core.c        | 3 +++
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 8 +++++++-
 arch/x86/kernel/cpu/resctrl/internal.h    | 2 ++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 9225ee5cca6fee..52b8991de8a31f 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -260,6 +260,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
 	r->num_closid = edx.split.cos_max + 1;
 	max_delay = eax.split.max_delay + 1;
 	r->default_ctrl = MAX_MBA_BW;
+	r->membw.arch_needs_linear = true;
 	if (ecx & MBA_IS_LINEAR) {
 		r->membw.delay_linear = true;
 		r->membw.min_bw = MAX_MBA_BW - max_delay;
@@ -267,6 +268,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
 	} else {
 		if (!rdt_get_mb_table(r))
 			return false;
+		r->membw.arch_needs_linear = false;
 	}
 	r->data_width = 3;
 
@@ -288,6 +290,7 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
 
 	/* AMD does not use delay */
 	r->membw.delay_linear = false;
+	r->membw.arch_needs_linear = false;
 
 	r->membw.min_bw = 0;
 	r->membw.bw_gran = 1;
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 934c8fb8a64a7d..e3bcd77add2bf5 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -33,6 +33,12 @@ static bool bw_validate_amd(char *buf, unsigned long *data,
 	unsigned long bw;
 	int ret;
 
+	/* temporary: always false on AMD */
+	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
+		rdt_last_cmd_puts("No support for non-linear MB domains\n");
+		return false;
+	}
+
 	ret = kstrtoul(buf, 10, &bw);
 	if (ret) {
 		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
@@ -82,7 +88,7 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
 	/*
 	 * Only linear delay values is supported for current Intel SKUs.
 	 */
-	if (!r->membw.delay_linear) {
+	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
 		rdt_last_cmd_puts("No support for non-linear MB domains\n");
 		return false;
 	}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 1eb39bd24990d5..7b0072397a11e0 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -372,6 +372,7 @@ struct rdt_cache {
  * @min_bw:		Minimum memory bandwidth percentage user can request
  * @bw_gran:		Granularity at which the memory bandwidth is allocated
  * @delay_linear:	True if memory B/W delay is in linear scale
+ * @arch_needs_linear:	True if we can't configure non-linear resources
  * @mba_sc:		True if MBA software controller(mba_sc) is enabled
  * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
  */
@@ -379,6 +380,7 @@ struct rdt_membw {
 	u32		min_bw;
 	u32		bw_gran;
 	u32		delay_linear;
+	bool		arch_needs_linear;
 	bool		mba_sc;
 	u32		*mb_map;
 };

From 5df3ca9334d5603e4afbb95953d0affb37dcf86b Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:27 +0000
Subject: [PATCH 016/262] x86/resctrl: Merge AMD/Intel parse_bw() calls

Now after arch_needs_linear has been added, the parse_bw() calls are
almost the same between AMD and Intel.

The difference is '!is_mba_sc()', which is not checked on AMD. This
will always be true on AMD CPUs as mba_sc cannot be enabled as
is_mba_linear() is false.

Removing this duplication means user-space visible behaviour and
error messages are not validated or generated in different places.

Reviewed-by : Babu Moger <babu.moger@amd.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-9-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/core.c        |  3 +-
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 57 +----------------------
 arch/x86/kernel/cpu/resctrl/internal.h    |  6 +--
 3 files changed, 5 insertions(+), 61 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 52b8991de8a31f..10a52d173e4fcd 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -168,6 +168,7 @@ struct rdt_resource rdt_resources_all[] = {
 		.name			= "MB",
 		.domains		= domain_init(RDT_RESOURCE_MBA),
 		.cache_level		= 3,
+		.parse_ctrlval		= parse_bw,
 		.format_str		= "%d=%*u",
 		.fflags			= RFTYPE_RES_MB,
 	},
@@ -926,7 +927,6 @@ static __init void rdt_init_res_defs_intel(void)
 		else if (r->rid == RDT_RESOURCE_MBA) {
 			r->msr_base = MSR_IA32_MBA_THRTL_BASE;
 			r->msr_update = mba_wrmsr_intel;
-			r->parse_ctrlval = parse_bw_intel;
 		}
 	}
 }
@@ -946,7 +946,6 @@ static __init void rdt_init_res_defs_amd(void)
 		else if (r->rid == RDT_RESOURCE_MBA) {
 			r->msr_base = MSR_IA32_MBA_BW_BASE;
 			r->msr_update = mba_wrmsr_amd;
-			r->parse_ctrlval = parse_bw_amd;
 		}
 	}
 }
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index e3bcd77add2bf5..b0e24cb6f85c47 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -21,59 +21,6 @@
 #include <linux/slab.h>
 #include "internal.h"
 
-/*
- * Check whether MBA bandwidth percentage value is correct. The value is
- * checked against the minimum and maximum bandwidth values specified by
- * the hardware. The allocated bandwidth percentage is rounded to the next
- * control step available on the hardware.
- */
-static bool bw_validate_amd(char *buf, unsigned long *data,
-			    struct rdt_resource *r)
-{
-	unsigned long bw;
-	int ret;
-
-	/* temporary: always false on AMD */
-	if (!r->membw.delay_linear && r->membw.arch_needs_linear) {
-		rdt_last_cmd_puts("No support for non-linear MB domains\n");
-		return false;
-	}
-
-	ret = kstrtoul(buf, 10, &bw);
-	if (ret) {
-		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
-		return false;
-	}
-
-	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
-		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
-				    r->membw.min_bw, r->default_ctrl);
-		return false;
-	}
-
-	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
-	return true;
-}
-
-int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
-		 struct rdt_domain *d)
-{
-	unsigned long bw_val;
-
-	if (d->have_new_ctrl) {
-		rdt_last_cmd_printf("Duplicate domain %d\n", d->id);
-		return -EINVAL;
-	}
-
-	if (!bw_validate_amd(data->buf, &bw_val, r))
-		return -EINVAL;
-
-	d->new_ctrl = bw_val;
-	d->have_new_ctrl = true;
-
-	return 0;
-}
-
 /*
  * Check whether MBA bandwidth percentage value is correct. The value is
  * checked against the minimum and max bandwidth values specified by the
@@ -110,8 +57,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
 	return true;
 }
 
-int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
-		   struct rdt_domain *d)
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+	     struct rdt_domain *d)
 {
 	unsigned long bw_val;
 
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 7b0072397a11e0..21f43994b56d44 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -471,10 +471,8 @@ struct rdt_resource {
 
 int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
 	      struct rdt_domain *d);
-int parse_bw_intel(struct rdt_parse_data *data, struct rdt_resource *r,
-		   struct rdt_domain *d);
-int parse_bw_amd(struct rdt_parse_data *data, struct rdt_resource *r,
-		 struct rdt_domain *d);
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+	     struct rdt_domain *d);
 
 extern struct mutex rdtgroup_mutex;
 

From 40eb0cb4939e462acfedea8c8064571e886b9773 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Tue, 18 Aug 2020 07:31:30 +0200
Subject: [PATCH 017/262] x86/cpu: Fix typos and improve the comments in
 sync_core()

- Fix typos.

- Move the compiler barrier comment to the top, because it's valid for the
  whole function, not just the legacy branch.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200818053130.GA3161093@gmail.com
Reviewed-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
 arch/x86/include/asm/sync_core.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index 4631c0f969d419..0fd4a9dfb29c45 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -47,16 +47,19 @@ static inline void iret_to_self(void)
  *
  *  b) Text was modified on a different CPU, may subsequently be
  *     executed on this CPU, and you want to make sure the new version
- *     gets executed.  This generally means you're calling this in a IPI.
+ *     gets executed.  This generally means you're calling this in an IPI.
  *
  * If you're calling this for a different reason, you're probably doing
  * it wrong.
+ *
+ * Like all of Linux's memory ordering operations, this is a
+ * compiler barrier as well.
  */
 static inline void sync_core(void)
 {
 	/*
 	 * The SERIALIZE instruction is the most straightforward way to
-	 * do this but it not universally available.
+	 * do this, but it is not universally available.
 	 */
 	if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
 		serialize();
@@ -67,10 +70,10 @@ static inline void sync_core(void)
 	 * For all other processors, there are quite a few ways to do this.
 	 * IRET-to-self is nice because it works on every CPU, at any CPL
 	 * (so it's compatible with paravirtualization), and it never exits
-	 * to a hypervisor. The only down sides are that it's a bit slow
+	 * to a hypervisor.  The only downsides are that it's a bit slow
 	 * (it seems to be a bit more than 2x slower than the fastest
-	 * options) and that it unmasks NMIs.  The "push %cs" is needed
-	 * because, in paravirtual environments, __KERNEL_CS may not be a
+	 * options) and that it unmasks NMIs.  The "push %cs" is needed,
+	 * because in paravirtual environments __KERNEL_CS may not be a
 	 * valid CS value when we do IRET directly.
 	 *
 	 * In case NMI unmasking or performance ever becomes a problem,
@@ -81,9 +84,6 @@ static inline void sync_core(void)
 	 * CPUID is the conventional way, but it's nasty: it doesn't
 	 * exist on some 486-like CPUs, and it usually exits to a
 	 * hypervisor.
-	 *
-	 * Like all of Linux's memory ordering operations, this is a
-	 * compiler barrier as well.
 	 */
 	iret_to_self();
 }

From 316e7f901f5aedb415c72d1eedd7de0846238dd0 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:28 +0000
Subject: [PATCH 018/262] x86/resctrl: Add struct rdt_cache::arch_has_{sparse,
 empty}_bitmaps

Intel CPUs expect the cache bitmap provided by user-space to have on a
single span of 1s, whereas AMD can support bitmaps like 0xf00f. Arm's
MPAM support also allows sparse bitmaps.

Similarly, Intel CPUs check at least one bit set, whereas AMD CPUs are
quite happy with an empty bitmap. Arm's MPAM allows an empty bitmap.

To move resctrl out to /fs/, platform differences like this need to be
explained.

Add two resource properties arch_has_{empty,sparse}_bitmaps. Test these
around the relevant parts of cbm_validate().

Merging the validate calls causes AMD to gain the min_cbm_bits test
needed for Haswell, but as it always sets this value to 1, it will never
match.

 [ bp: Massage commit message. ]

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-10-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/core.c        | 14 ++++----
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 39 ++++++-----------------
 arch/x86/kernel/cpu/resctrl/internal.h    |  8 ++---
 3 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 10a52d173e4fcd..cbbd751bcc385f 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -922,9 +922,10 @@ static __init void rdt_init_res_defs_intel(void)
 		    r->rid == RDT_RESOURCE_L3CODE ||
 		    r->rid == RDT_RESOURCE_L2 ||
 		    r->rid == RDT_RESOURCE_L2DATA ||
-		    r->rid == RDT_RESOURCE_L2CODE)
-			r->cbm_validate = cbm_validate_intel;
-		else if (r->rid == RDT_RESOURCE_MBA) {
+		    r->rid == RDT_RESOURCE_L2CODE) {
+			r->cache.arch_has_sparse_bitmaps = false;
+			r->cache.arch_has_empty_bitmaps = false;
+		} else if (r->rid == RDT_RESOURCE_MBA) {
 			r->msr_base = MSR_IA32_MBA_THRTL_BASE;
 			r->msr_update = mba_wrmsr_intel;
 		}
@@ -941,9 +942,10 @@ static __init void rdt_init_res_defs_amd(void)
 		    r->rid == RDT_RESOURCE_L3CODE ||
 		    r->rid == RDT_RESOURCE_L2 ||
 		    r->rid == RDT_RESOURCE_L2DATA ||
-		    r->rid == RDT_RESOURCE_L2CODE)
-			r->cbm_validate = cbm_validate_amd;
-		else if (r->rid == RDT_RESOURCE_MBA) {
+		    r->rid == RDT_RESOURCE_L2CODE) {
+			r->cache.arch_has_sparse_bitmaps = true;
+			r->cache.arch_has_empty_bitmaps = true;
+		} else if (r->rid == RDT_RESOURCE_MBA) {
 			r->msr_base = MSR_IA32_MBA_BW_BASE;
 			r->msr_update = mba_wrmsr_amd;
 		}
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index b0e24cb6f85c47..c877642e8a1475 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -76,12 +76,14 @@ int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
 }
 
 /*
- * Check whether a cache bit mask is valid. The SDM says:
+ * Check whether a cache bit mask is valid.
+ * For Intel the SDM says:
  *	Please note that all (and only) contiguous '1' combinations
  *	are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
  * Additionally Haswell requires at least two bits set.
+ * AMD allows non-contiguous bitmasks.
  */
-bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
+static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
 {
 	unsigned long first_bit, zero_bit, val;
 	unsigned int cbm_len = r->cache.cbm_len;
@@ -93,7 +95,8 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
 		return false;
 	}
 
-	if (val == 0 || val > r->default_ctrl) {
+	if ((!r->cache.arch_has_empty_bitmaps && val == 0) ||
+	    val > r->default_ctrl) {
 		rdt_last_cmd_puts("Mask out of range\n");
 		return false;
 	}
@@ -101,7 +104,9 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
 	first_bit = find_first_bit(&val, cbm_len);
 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
 
-	if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
+	/* Are non-contiguous bitmaps allowed? */
+	if (!r->cache.arch_has_sparse_bitmaps &&
+	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
 		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
 		return false;
 	}
@@ -116,30 +121,6 @@ bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r)
 	return true;
 }
 
-/*
- * Check whether a cache bit mask is valid. AMD allows non-contiguous
- * bitmasks
- */
-bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r)
-{
-	unsigned long val;
-	int ret;
-
-	ret = kstrtoul(buf, 16, &val);
-	if (ret) {
-		rdt_last_cmd_printf("Non-hex character in the mask %s\n", buf);
-		return false;
-	}
-
-	if (val > r->default_ctrl) {
-		rdt_last_cmd_puts("Mask out of range\n");
-		return false;
-	}
-
-	*data = val;
-	return true;
-}
-
 /*
  * Read one cache bit mask (hex). Check that it is valid for the current
  * resource type.
@@ -165,7 +146,7 @@ int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
 		return -EINVAL;
 	}
 
-	if (!r->cbm_validate(data->buf, &cbm_val, r))
+	if (!cbm_validate(data->buf, &cbm_val, r))
 		return -EINVAL;
 
 	if ((rdtgrp->mode == RDT_MODE_EXCLUSIVE ||
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 21f43994b56d44..0b4829484c9e95 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -358,6 +358,8 @@ struct msr_param {
  *			in a cache bit mask
  * @shareable_bits:	Bitmask of shareable resource with other
  *			executing entities
+ * @arch_has_sparse_bitmaps:	True if a bitmap like f00f is valid.
+ * @arch_has_empty_bitmaps:	True if the '0' bitmap is valid.
  */
 struct rdt_cache {
 	unsigned int	cbm_len;
@@ -365,6 +367,8 @@ struct rdt_cache {
 	unsigned int	cbm_idx_mult;
 	unsigned int	cbm_idx_offset;
 	unsigned int	shareable_bits;
+	bool		arch_has_sparse_bitmaps;
+	bool		arch_has_empty_bitmaps;
 };
 
 /**
@@ -434,7 +438,6 @@ struct rdt_parse_data {
  * @cache:		Cache allocation related data
  * @format_str:		Per resource format string to show domain value
  * @parse_ctrlval:	Per resource function pointer to parse control values
- * @cbm_validate	Cache bitmask validate function
  * @evt_list:		List of monitoring events
  * @num_rmid:		Number of RMIDs available
  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
@@ -461,7 +464,6 @@ struct rdt_resource {
 	int (*parse_ctrlval)(struct rdt_parse_data *data,
 			     struct rdt_resource *r,
 			     struct rdt_domain *d);
-	bool (*cbm_validate)(char *buf, u32 *data, struct rdt_resource *r);
 	struct list_head	evt_list;
 	int			num_rmid;
 	unsigned int		mon_scale;
@@ -604,8 +606,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
 void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
-bool cbm_validate_intel(char *buf, u32 *data, struct rdt_resource *r);
-bool cbm_validate_amd(char *buf, u32 *data, struct rdt_resource *r);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */

From 709c4362725abb5fa1e36fd94893a9b0d049df82 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 8 Jul 2020 16:39:29 +0000
Subject: [PATCH 019/262] cacheinfo: Move resctrl's get_cache_id() to the
 cacheinfo header file

resctrl/core.c defines get_cache_id() for use in its cpu-hotplug
callbacks. This gets the id attribute of the cache at the corresponding
level of a CPU.

Later rework means this private function needs to be shared. Move
it to the header file.

The name conflicts with a different definition in intel_cacheinfo.c,
name it get_cpu_cacheinfo_id() to show its relation with
get_cpu_cacheinfo().

Now this is visible on other architectures, check the id attribute
has actually been set.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20200708163929.2783-11-james.morse@arm.com
---
 arch/x86/kernel/cpu/resctrl/core.c | 17 ++---------------
 include/linux/cacheinfo.h          | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index cbbd751bcc385f..1c00f2f0bf0c23 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -350,19 +350,6 @@ static void rdt_get_cdp_l2_config(void)
 	rdt_get_cdp_config(RDT_RESOURCE_L2, RDT_RESOURCE_L2CODE);
 }
 
-static int get_cache_id(int cpu, int level)
-{
-	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
-	int i;
-
-	for (i = 0; i < ci->num_leaves; i++) {
-		if (ci->info_list[i].level == level)
-			return ci->info_list[i].id;
-	}
-
-	return -1;
-}
-
 static void
 mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
 {
@@ -560,7 +547,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
  */
 static void domain_add_cpu(int cpu, struct rdt_resource *r)
 {
-	int id = get_cache_id(cpu, r->cache_level);
+	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
 	struct list_head *add_pos = NULL;
 	struct rdt_domain *d;
 
@@ -606,7 +593,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 
 static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 {
-	int id = get_cache_id(cpu, r->cache_level);
+	int id = get_cpu_cacheinfo_id(cpu, r->cache_level);
 	struct rdt_domain *d;
 
 	d = rdt_find_domain(r, id, NULL);
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 46b92cd61d0c82..4f72b47973c304 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -3,6 +3,7 @@
 #define _LINUX_CACHEINFO_H
 
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 
@@ -119,4 +120,24 @@ int acpi_find_last_cache_level(unsigned int cpu);
 
 const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
 
+/*
+ * Get the id of the cache associated with @cpu at level @level.
+ * cpuhp lock must be held.
+ */
+static inline int get_cpu_cacheinfo_id(int cpu, int level)
+{
+	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
+	int i;
+
+	for (i = 0; i < ci->num_leaves; i++) {
+		if (ci->info_list[i].level == level) {
+			if (ci->info_list[i].attributes & CACHE_ID)
+				return ci->info_list[i].id;
+			return -1;
+		}
+	}
+
+	return -1;
+}
+
 #endif /* _LINUX_CACHEINFO_H */

From 85e6084e0b436cabe9c909e679937998ffbf9c9d Mon Sep 17 00:00:00 2001
From: Luca Stefani <luca.stefani.ge1@gmail.com>
Date: Wed, 5 Aug 2020 11:57:08 +0200
Subject: [PATCH 020/262] RAS/CEC: Fix cec_init() prototype

late_initcall() expects a function that returns an integer. Update the
function signature to match.

 [ bp: Massage commit message into proper sentences. ]

Fixes: 9554bfe403bd ("x86/mce: Convert the CEC to use the MCE notifier")
Signed-off-by: Luca Stefani <luca.stefani.ge1@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Tested-by: Sami Tolvanen <samitolvanen@google.com>
Link: https://lkml.kernel.org/r/20200805095708.83939-1-luca.stefani.ge1@gmail.com
---
 drivers/ras/cec.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 569d9ad2c59428..6939aa5b3dc7fa 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -553,20 +553,20 @@ static struct notifier_block cec_nb = {
 	.priority	= MCE_PRIO_CEC,
 };
 
-static void __init cec_init(void)
+static int __init cec_init(void)
 {
 	if (ce_arr.disabled)
-		return;
+		return -ENODEV;
 
 	ce_arr.array = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!ce_arr.array) {
 		pr_err("Error allocating CE array page!\n");
-		return;
+		return -ENOMEM;
 	}
 
 	if (create_debugfs_nodes()) {
 		free_page((unsigned long)ce_arr.array);
-		return;
+		return -ENOMEM;
 	}
 
 	INIT_DELAYED_WORK(&cec_work, cec_work_fn);
@@ -575,6 +575,7 @@ static void __init cec_init(void)
 	mce_register_decode_chain(&cec_nb);
 
 	pr_info("Correctable Errors collector initialized.\n");
+	return 0;
 }
 late_initcall(cec_init);
 

From 368d1887200d68075c064a62a9aa191168cf1eed Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 20 Jul 2020 14:53:53 +0000
Subject: [PATCH 021/262] x86/MCE/AMD, EDAC/mce_amd: Remove struct
 smca_hwid.xec_bitmap

The Extended Error Code Bitmap (xec_bitmap) for a Scalable MCA bank type
was intended to be used by the kernel to filter out invalid error codes
on a system. However, this is unnecessary after a few product releases
because the hardware will only report valid error codes. Thus, there's
no need for it with future systems.

Remove the xec_bitmap field and all references to it.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200720145353.43924-1-Yazen.Ghannam@amd.com
---
 arch/x86/include/asm/mce.h    |  1 -
 arch/x86/kernel/cpu/mce/amd.c | 44 +++++++++++++++++------------------
 drivers/edac/mce_amd.c        |  4 +---
 3 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index cf503824529ced..6adced6e7dd357 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -328,7 +328,6 @@ enum smca_bank_types {
 struct smca_hwid {
 	unsigned int bank_type;	/* Use with smca_bank_types for easy indexing. */
 	u32 hwid_mcatype;	/* (hwid,mcatype) tuple */
-	u32 xec_bitmap;		/* Bitmap of valid ExtErrorCodes; current max is 21. */
 	u8 count;		/* Number of instances. */
 };
 
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 99be063fcb1bb1..0c6b02dd744c14 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -132,49 +132,49 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
 }
 
 static struct smca_hwid smca_hwid_mcatypes[] = {
-	/* { bank_type, hwid_mcatype, xec_bitmap } */
+	/* { bank_type, hwid_mcatype } */
 
 	/* Reserved type */
-	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+	{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0)	},
 
 	/* ZN Core (HWID=0xB0) MCA types */
-	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
-	{ SMCA_LS_V2,	 HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
-	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
-	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
-	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0)	},
+	{ SMCA_LS_V2,	 HWID_MCATYPE(0xB0, 0x10)	},
+	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1)	},
+	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2)	},
+	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3)	},
 	/* HWID 0xB0 MCATYPE 0x4 is Reserved */
-	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0xFFF },
-	{ SMCA_FP,	 HWID_MCATYPE(0xB0, 0x6), 0x7F },
-	{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5)	},
+	{ SMCA_FP,	 HWID_MCATYPE(0xB0, 0x6)	},
+	{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7)	},
 
 	/* Data Fabric MCA types */
-	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
-	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0x1F },
-	{ SMCA_CS_V2,	 HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
+	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0)	},
+	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1)	},
+	{ SMCA_CS_V2,	 HWID_MCATYPE(0x2E, 0x2)	},
 
 	/* Unified Memory Controller MCA type */
-	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0xFF },
+	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0)	},
 
 	/* Parameter Block MCA type */
-	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0), 0x1 },
+	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0)	},
 
 	/* Platform Security Processor MCA type */
-	{ SMCA_PSP,	 HWID_MCATYPE(0xFF, 0x0), 0x1 },
-	{ SMCA_PSP_V2,	 HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
+	{ SMCA_PSP,	 HWID_MCATYPE(0xFF, 0x0)	},
+	{ SMCA_PSP_V2,	 HWID_MCATYPE(0xFF, 0x1)	},
 
 	/* System Management Unit MCA type */
-	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
-	{ SMCA_SMU_V2,	 HWID_MCATYPE(0x01, 0x1), 0x7FF },
+	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0)	},
+	{ SMCA_SMU_V2,	 HWID_MCATYPE(0x01, 0x1)	},
 
 	/* Microprocessor 5 Unit MCA type */
-	{ SMCA_MP5,	 HWID_MCATYPE(0x01, 0x2), 0x3FF },
+	{ SMCA_MP5,	 HWID_MCATYPE(0x01, 0x2)	},
 
 	/* Northbridge IO Unit MCA type */
-	{ SMCA_NBIO,	 HWID_MCATYPE(0x18, 0x0), 0x1F },
+	{ SMCA_NBIO,	 HWID_MCATYPE(0x18, 0x0)	},
 
 	/* PCI Express Unit MCA type */
-	{ SMCA_PCIE,	 HWID_MCATYPE(0x46, 0x0), 0x1F },
+	{ SMCA_PCIE,	 HWID_MCATYPE(0x46, 0x0)	},
 };
 
 struct smca_bank smca_banks[MAX_NR_BANKS];
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 325aedf46ff2b8..d4168c46739c7b 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -990,10 +990,8 @@ static void decode_smca_error(struct mce *m)
 	pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
 
 	/* Only print the decode of valid error codes */
-	if (xec < smca_mce_descs[bank_type].num_descs &&
-			(hwid->xec_bitmap & BIT_ULL(xec))) {
+	if (xec < smca_mce_descs[bank_type].num_descs)
 		pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
-	}
 
 	if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
 		decode_dram_ecc(cpu_to_node(m->extcpu), m);

From b91e7089ae70d2f7c81a4456e5b78fef498663d9 Mon Sep 17 00:00:00 2001
From: Brendan Shanks <bshanks@codeweavers.com>
Date: Fri, 10 Jul 2020 15:45:25 -0700
Subject: [PATCH 022/262] x86/umip: Add emulation/spoofing for SLDT and STR
 instructions

Add emulation/spoofing of SLDT and STR for both 32- and 64-bit
processes.

Wine users have found a small number of Windows apps using SLDT that
were crashing when run on UMIP-enabled systems.

Originally-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Reported-by: Andreas Rammhold <andi@notmuch.email>
Signed-off-by: Brendan Shanks <bshanks@codeweavers.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Link: https://lkml.kernel.org/r/20200710224525.21966-1-bshanks@codeweavers.com
---
 arch/x86/kernel/umip.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 8d5cbe1bbb3bc0..2c304fd0bb1a2a 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -45,11 +45,12 @@
  * value that, lies close to the top of the kernel memory. The limit for the GDT
  * and the IDT are set to zero.
  *
- * Given that SLDT and STR are not commonly used in programs that run on WineHQ
- * or DOSEMU2, they are not emulated.
- *
- * The instruction smsw is emulated to return the value that the register CR0
+ * The instruction SMSW is emulated to return the value that the register CR0
  * has at boot time as set in the head_32.
+ * SLDT and STR are emulated to return the values that the kernel programmatically
+ * assigns:
+ * - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
+ * - STR returns (GDT_ENTRY_TSS * 8).
  *
  * Emulation is provided for both 32-bit and 64-bit processes.
  *
@@ -244,16 +245,34 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
 		*data_size += UMIP_GDT_IDT_LIMIT_SIZE;
 		memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
 
-	} else if (umip_inst == UMIP_INST_SMSW) {
-		unsigned long dummy_value = CR0_STATE;
+	} else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
+		   umip_inst == UMIP_INST_STR) {
+		unsigned long dummy_value;
+
+		if (umip_inst == UMIP_INST_SMSW) {
+			dummy_value = CR0_STATE;
+		} else if (umip_inst == UMIP_INST_STR) {
+			dummy_value = GDT_ENTRY_TSS * 8;
+		} else if (umip_inst == UMIP_INST_SLDT) {
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+			down_read(&current->mm->context.ldt_usr_sem);
+			if (current->mm->context.ldt)
+				dummy_value = GDT_ENTRY_LDT * 8;
+			else
+				dummy_value = 0;
+			up_read(&current->mm->context.ldt_usr_sem);
+#else
+			dummy_value = 0;
+#endif
+		}
 
 		/*
-		 * Even though the CR0 register has 4 bytes, the number
+		 * For these 3 instructions, the number
 		 * of bytes to be copied in the result buffer is determined
 		 * by whether the operand is a register or a memory location.
 		 * If operand is a register, return as many bytes as the operand
 		 * size. If operand is memory, return only the two least
-		 * siginificant bytes of CR0.
+		 * siginificant bytes.
 		 */
 		if (X86_MODRM_MOD(insn->modrm.value) == 3)
 			*data_size = insn->opnd_bytes;
@@ -261,7 +280,6 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
 			*data_size = 2;
 
 		memcpy(data, &dummy_value, *data_size);
-	/* STR and SLDT  are not emulated */
 	} else {
 		return -EINVAL;
 	}
@@ -383,10 +401,6 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
 			umip_insns[umip_inst]);
 
-	/* Do not emulate (spoof) SLDT or STR. */
-	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
-		return false;
-
 	umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
 
 	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,

From 1f35c9c0ce3888405fc813afedaff79de433cf27 Mon Sep 17 00:00:00 2001
From: Chris Down <chris@chrisdown.name>
Date: Fri, 21 Aug 2020 13:10:24 +0100
Subject: [PATCH 023/262] x86/msr: Prevent userspace MSR access from dominating
 the console

Applications which manipulate MSRs from userspace often do so
infrequently, and all at once. As such, the default printk ratelimit
architecture supplied by pr_err_ratelimited() doesn't do enough to prevent
kmsg becoming completely overwhelmed with their messages and pushing
other salient information out of the circular buffer.

In one case, I saw over 80% of kmsg being filled with these messages,
and the default kmsg buffer being completely filled less than 5 minutes
after boot(!).

Make things much less aggressive, while still achieving the original
goal of fiter_write(). Operators will still get warnings that MSRs are
being manipulated from userspace, but they won't have other also
potentially useful messages pushed out of the kmsg buffer.

Of course, one can boot with `allow_writes=1` to avoid these messages at
all, but that then has the downfall that one doesn't get _any_
notification at all about these problems in the first place, and so is
much less likely to forget to fix it.

One might rather it was less binary: it was still logged, just less
often, so that application developers _do_ have the incentive to improve
their current methods, without the kernel having to push other useful
stuff out of the kmsg buffer.

This one example isn't the point, of course: I'm sure there are plenty
of other non-ideal-but-pragmatic cases where people are writing to MSRs
from userspace right now, and it will take time for those people to find
other solutions.

Overall, keep the intent of the original patch, while mitigating its
sometimes heavy effects on kmsg composition.

 [ bp: Massage a bit. ]

Signed-off-by: Chris Down <chris@chrisdown.name>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/563994ef132ce6cffd28fc659254ca37d032b5ef.1598011595.git.chris@chrisdown.name
---
 arch/x86/kernel/msr.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 49dcfb85e7730d..b03001dfc6134d 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -80,18 +80,30 @@ static ssize_t msr_read(struct file *file, char __user *buf,
 
 static int filter_write(u32 reg)
 {
+	/*
+	 * MSRs writes usually happen all at once, and can easily saturate kmsg.
+	 * Only allow one message every 30 seconds.
+	 *
+	 * It's possible to be smarter here and do it (for example) per-MSR, but
+	 * it would certainly be more complex, and this is enough at least to
+	 * avoid saturating the ring buffer.
+	 */
+	static DEFINE_RATELIMIT_STATE(fw_rs, 30 * HZ, 1);
+
 	switch (allow_writes) {
 	case MSR_WRITES_ON:  return 0;
 	case MSR_WRITES_OFF: return -EPERM;
 	default: break;
 	}
 
+	if (!__ratelimit(&fw_rs))
+		return 0;
+
 	if (reg == MSR_IA32_ENERGY_PERF_BIAS)
 		return 0;
 
-	pr_err_ratelimited("Write to unrecognized MSR 0x%x by %s\n"
-			   "Please report to x86@kernel.org\n",
-			   reg, current->comm);
+	pr_err("Write to unrecognized MSR 0x%x by %s\n"
+	       "Please report to x86@kernel.org\n", reg, current->comm);
 
 	return 0;
 }

From c31feed8461fb8648075ba9b53d9e527d530972f Mon Sep 17 00:00:00 2001
From: Chris Down <chris@chrisdown.name>
Date: Fri, 21 Aug 2020 13:10:35 +0100
Subject: [PATCH 024/262] x86/msr: Make source of unrecognised MSR writes
 unambiguous

In many cases, task_struct.comm isn't enough to distinguish the
offender, since for interpreted languages it's likely just going to be
"python3" or whatever. Add the pid to make it unambiguous.

 [ bp: Make the printk string a single line for easier grepping. ]

Signed-off-by: Chris Down <chris@chrisdown.name>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/6f6fbd0ee6c99bc5e47910db700a6642159db01b.1598011595.git.chris@chrisdown.name
---
 arch/x86/kernel/msr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index b03001dfc6134d..c0d40981065891 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -102,8 +102,8 @@ static int filter_write(u32 reg)
 	if (reg == MSR_IA32_ENERGY_PERF_BIAS)
 		return 0;
 
-	pr_err("Write to unrecognized MSR 0x%x by %s\n"
-	       "Please report to x86@kernel.org\n", reg, current->comm);
+	pr_err("Write to unrecognized MSR 0x%x by %s (pid: %d). Please report to x86@kernel.org.\n",
+	       reg, current->comm, current->pid);
 
 	return 0;
 }

From 0b2c605fa4ee3117c00b97b7af67791576b28f88 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 20 Aug 2020 11:10:15 +0200
Subject: [PATCH 025/262] x86/entry/64: Correct the comment over
 SAVE_AND_SET_GSBASE

Add the proper explanation why an LFENCE is not needed in the FSGSBASE
case.

Fixes: c82965f9e530 ("x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit")
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200821090710.GE12181@zn.tnic
---
 arch/x86/entry/entry_64.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea933781627..bf78de4de7f0be 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -840,8 +840,9 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	 * retrieve and set the current CPUs kernel GSBASE. The stored value
 	 * has to be restored in paranoid_exit unconditionally.
 	 *
-	 * The MSR write ensures that no subsequent load is based on a
-	 * mispredicted GSBASE. No extra FENCE required.
+	 * The unconditional write to GS base below ensures that no subsequent
+	 * loads based on a mispredicted GS base can happen, therefore no LFENCE
+	 * is needed here.
 	 */
 	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
 	ret

From b5fb51340f07a65932af0df82d11db06478439f7 Mon Sep 17 00:00:00 2001
From: Robert Richter <rric@kernel.org>
Date: Mon, 24 Aug 2020 14:49:31 +0200
Subject: [PATCH 026/262] EDAC/highbank: Handover Calxeda Highbank maintenance
 to Andre Przywara

I do not have hardware anymore, nor there is ongoing development. So
handover maintenance to Andre who already maintains the last remainings
of Calxeda.

Signed-off-by: Robert Richter <rric@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lkml.kernel.org/r/20200824124931.2933-1-rric@kernel.org
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 75d5e0ae4d6eaf..2ee9959967140e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6166,7 +6166,7 @@ S:	Supported
 F:	drivers/edac/bluefield_edac.c
 
 EDAC-CALXEDA
-M:	Robert Richter <rric@kernel.org>
+M:	Andre Przywara <andre.przywara@arm.com>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/highbank*

From 5f1dd4dda5c8796c405e856aaa11e187f6885924 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 18 Aug 2020 12:28:31 +0200
Subject: [PATCH 027/262] x86/fsgsbase: Replace static_cpu_has() with
 boot_cpu_has()

ptrace and prctl() are not really fast paths to warrant the use of
static_cpu_has() and cause alternatives patching for no good reason.
Replace with boot_cpu_has() which is simple and fast enough.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200818103715.32736-1-bp@alien8.de
---
 arch/x86/include/asm/fsgsbase.h | 4 ++--
 arch/x86/kernel/process_64.c    | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index d552646411a9d5..35cff5f2becf6b 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -57,7 +57,7 @@ static inline unsigned long x86_fsbase_read_cpu(void)
 {
 	unsigned long fsbase;
 
-	if (static_cpu_has(X86_FEATURE_FSGSBASE))
+	if (boot_cpu_has(X86_FEATURE_FSGSBASE))
 		fsbase = rdfsbase();
 	else
 		rdmsrl(MSR_FS_BASE, fsbase);
@@ -67,7 +67,7 @@ static inline unsigned long x86_fsbase_read_cpu(void)
 
 static inline void x86_fsbase_write_cpu(unsigned long fsbase)
 {
-	if (static_cpu_has(X86_FEATURE_FSGSBASE))
+	if (boot_cpu_has(X86_FEATURE_FSGSBASE))
 		wrfsbase(fsbase);
 	else
 		wrmsrl(MSR_FS_BASE, fsbase);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9afefe325acb1f..df342bedea88af 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -407,7 +407,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
 {
 	unsigned long gsbase;
 
-	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+	if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
 		unsigned long flags;
 
 		local_irq_save(flags);
@@ -422,7 +422,7 @@ unsigned long x86_gsbase_read_cpu_inactive(void)
 
 void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
 {
-	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+	if (boot_cpu_has(X86_FEATURE_FSGSBASE)) {
 		unsigned long flags;
 
 		local_irq_save(flags);
@@ -439,7 +439,7 @@ unsigned long x86_fsbase_read_task(struct task_struct *task)
 
 	if (task == current)
 		fsbase = x86_fsbase_read_cpu();
-	else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
+	else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
 		 (task->thread.fsindex == 0))
 		fsbase = task->thread.fsbase;
 	else
@@ -454,7 +454,7 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
 
 	if (task == current)
 		gsbase = x86_gsbase_read_cpu_inactive();
-	else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
+	else if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
 		 (task->thread.gsindex == 0))
 		gsbase = task->thread.gsbase;
 	else

From 3b0950af21e02a8c902ce1143ac5563fadcdfb3e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 12 Jan 2020 18:17:05 +0100
Subject: [PATCH 028/262] m68k: amiga: Fix Denise detection on OCS

The "default" statement for detecting an original Denise chip seems to
be misplaced, causing original Denise chips not being detected.

Fix this by moving it from the outer to the inner "switch" statement.

Fortunately no code relies on this, but the detected version is printed
during boot, and available through /proc/hardware.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20200112171705.22600-1-geert@linux-m68k.org
---
 arch/m68k/amiga/config.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 8f23b2fab64c7a..9c1afa143d0596 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -278,11 +278,11 @@ static void __init amiga_identify(void)
 			case 0x08:
 				AMIGAHW_SET(LISA);
 				break;
+			default:
+				AMIGAHW_SET(DENISE);
+				break;
 			}
 			break;
-		default:
-			AMIGAHW_SET(DENISE);
-			break;
 		}
 		switch ((amiga_custom.vposr>>8) & 0x7f) {
 		case 0x00:

From ea2abe2fd59a73b26c01e9a3bb83efc46e44face Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 13 Jul 2020 09:24:29 +0200
Subject: [PATCH 029/262] zorro: Fix address space collision message with RAM
 expansion boards

When booting Linux on an Amiga with BigRAMPlus Zorro expansion board:

    zorro: Address space collision on device Zorro device 12128600 (Individual Computers) [??? 0x50000000-]

This happens because the address space occupied by the BigRAMPlus Zorro
device is already in use, as it is part of system RAM.  Hence the
message is harmless.

Zorro memory expansion boards have the ERTF_MEMLIST flag set, which
tells AmigaOS to link the board's RAM into the free memory list.  While
we could skip registering the board resource if this flag is set, that
may cause issues with Zorro II RAM excluded in a memfile.

Hence fix the issue by just ignoring the error if ERTF_MEMLIST is set.

Reported-by: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20200713072429.6182-1-geert@linux-m68k.org
---
 drivers/zorro/zorro.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index 47c733817903f3..1b992864858319 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -181,7 +181,7 @@ static int __init amiga_zorro_probe(struct platform_device *pdev)
 		z->resource.name = z->name;
 		r = zorro_find_parent_resource(pdev, z);
 		error = request_resource(r, &z->resource);
-		if (error)
+		if (error && !(z->rom.er_Type & ERTF_MEMLIST))
 			dev_err(&bus->dev,
 				"Address space collision on device %s %pR\n",
 				z->name, &z->resource);

From c75e59e401b66971d166780c9fad5f741db2c54e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Jul 2020 13:43:14 +0200
Subject: [PATCH 030/262] m68k: Use get_kernel_nofault() in show_registers()

Use the proper get_kernel_nofault() helper to access an unsafe kernel
pointer without faulting instead of playing with set_fs() and
get_user().

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20200720114314.196686-1-hch@lst.de
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/traps.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 546e81935fe89d..9e1261462bcc52 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -845,7 +845,6 @@ static void show_trace(unsigned long *stack, const char *loglvl)
 void show_registers(struct pt_regs *regs)
 {
 	struct frame *fp = (struct frame *)regs;
-	mm_segment_t old_fs = get_fs();
 	u16 c, *cp;
 	unsigned long addr;
 	int i;
@@ -918,10 +917,9 @@ void show_registers(struct pt_regs *regs)
 	show_stack(NULL, (unsigned long *)addr, KERN_INFO);
 
 	pr_info("Code:");
-	set_fs(KERNEL_DS);
 	cp = (u16 *)regs->pc;
 	for (i = -8; i < 16; i++) {
-		if (get_user(c, cp + i) && i >= 0) {
+		if (get_kernel_nofault(c, cp + i) && i >= 0) {
 			pr_cont(" Bad PC value.");
 			break;
 		}
@@ -930,7 +928,6 @@ void show_registers(struct pt_regs *regs)
 		else
 			pr_cont(" <%04x>", c);
 	}
-	set_fs(old_fs);
 	pr_cont("\n");
 }
 

From 5661bccb70ef134502b9d2e80a19465ad943b022 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 23 Jul 2020 09:25:51 +1000
Subject: [PATCH 031/262] m68k: Correct some typos in comments

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Link: https://lore.kernel.org/r/f54e99e9bd1e25ad70a6a1d7a7ec9ab2b4e50d68.1595460351.git.fthain@telegraphics.com.au
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/head.S | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S
index 29de2b3108ea52..493c95db0e512d 100644
--- a/arch/m68k/kernel/head.S
+++ b/arch/m68k/kernel/head.S
@@ -57,7 +57,7 @@
  * Of course, readability is a subjective issue, so it will never be
  * argued that that goal was accomplished.  It was merely a goal.
  * A key way to help make code more readable is to give good
- * documentation.  So, the first thing you will find is exaustive
+ * documentation.  So, the first thing you will find is exhaustive
  * write-ups on the structure of the file, and the features of the
  * functional subroutines.
  *
@@ -1304,7 +1304,7 @@ L(mmu_fixup_done):
  * mmu_engage
  *
  * This chunk of code performs the gruesome task of engaging the MMU.
- * The reason its gruesome is because when the MMU becomes engaged it
+ * The reason it's gruesome is because when the MMU becomes engaged it
  * maps logical addresses to physical addresses.  The Program Counter
  * register is then passed through the MMU before the next instruction
  * is fetched (the instruction following the engage MMU instruction).
@@ -1369,7 +1369,7 @@ L(mmu_fixup_done):
 /*
  * After this point no new memory is allocated and
  * the start of available memory is stored in availmem.
- * (The bootmem allocator requires now the physicall address.)
+ * (The bootmem allocator requires now the physical address.)
  */
 
 	movel	L(memory_start),availmem
@@ -1547,7 +1547,7 @@ func_return	get_bi_record
  *	seven bits of the logical address (LA) are used as an
  *	index into the "root table."  Each entry in the root
  *	table has a bit which specifies if it's a valid pointer to a
- *	pointer table.  Each entry defines a 32KMeg range of memory.
+ *	pointer table.  Each entry defines a 32Meg range of memory.
  *	If an entry is invalid then that logical range of 32M is
  *	invalid and references to that range of memory (when the MMU
  *	is enabled) will fault.  If the entry is valid, then it does
@@ -1584,7 +1584,7 @@ func_return	get_bi_record
  *		bits 17..12 - index into the Page Table
  *		bits 11..0  - offset into a particular 4K page
  *
- *	The algorithms which follows do one thing: they abstract
+ *	The algorithms which follow do one thing: they abstract
  *	the MMU hardware.  For example, there are three kinds of
  *	cache settings that are relevant.  Either, memory is
  *	being mapped in which case it is either Kernel Code (or
@@ -2082,7 +2082,7 @@ func_return	mmu_map_tt
  *	mmu_map
  *
  *	This routine will map a range of memory using a pointer
- *	table and allocating the pages on the fly from the kernel.
+ *	table and allocate the pages on the fly from the kernel.
  *	The pointer table does not have to be already linked into
  *	the root table, this routine will do that if necessary.
  *
@@ -2528,7 +2528,7 @@ func_start	mmu_get_root_table_entry,%d0/%a1
 
 	/* Find the start of free memory, get_bi_record does this for us,
 	 * as the bootinfo structure is located directly behind the kernel
-	 * and and we simply search for the last entry.
+	 * we simply search for the last entry.
 	 */
 	get_bi_record	BI_LAST
 	addw	#PAGESIZE-1,%a0
@@ -2654,7 +2654,7 @@ func_start	mmu_get_page_table_entry,%d0/%a1
 	jne	2f
 
 	/* If the page table entry doesn't exist, we allocate a complete new
-	 * page and use it as one continues big page table which can cover
+	 * page and use it as one continuous big page table which can cover
 	 * 4MB of memory, nearly almost all mappings have that alignment.
 	 */
 	get_new_page

From e48cb1a3fb9165009fe318e1db2fde10d303c1d3 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 24 Aug 2020 12:11:20 -0700
Subject: [PATCH 032/262] x86/resctrl: Enumerate per-thread MBA controls

Some systems support per-thread Memory Bandwidth Allocation (MBA) which
applies a throttling delay value to each hardware thread instead of to
a core. Per-thread MBA is enumerated by CPUID.

No feature flag is shown in /proc/cpuinfo. User applications need to
check a resctrl throttling mode info file to know if the feature is
supported.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Babu Moger <babu.moger@amd.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/1598296281-127595-2-git-send-email-fenghua.yu@intel.com
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/cpuid-deps.c   | 1 +
 arch/x86/kernel/cpu/scattered.c    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366c0..4a7473ae55ac95 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -288,6 +288,7 @@
 #define X86_FEATURE_FENCE_SWAPGS_USER	(11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 3cbe24ca80abd9..3e30b26c50ef62 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_CQM_MBM_TOTAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_CQM_MBM_LOCAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_AVX512_BF16,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_PER_THREAD_MBA,		X86_FEATURE_MBA       },
 	{}
 };
 
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c3c97a20..bccfc9ff3cc12b 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -35,6 +35,7 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_CDP_L3,		CPUID_ECX,  2, 0x00000010, 1 },
 	{ X86_FEATURE_CDP_L2,		CPUID_ECX,  2, 0x00000010, 2 },
 	{ X86_FEATURE_MBA,		CPUID_EBX,  3, 0x00000010, 0 },
+	{ X86_FEATURE_PER_THREAD_MBA,	CPUID_ECX,  0, 0x00000010, 3 },
 	{ X86_FEATURE_HW_PSTATE,	CPUID_EDX,  7, 0x80000007, 0 },
 	{ X86_FEATURE_CPB,		CPUID_EDX,  9, 0x80000007, 0 },
 	{ X86_FEATURE_PROC_FEEDBACK,    CPUID_EDX, 11, 0x80000007, 0 },

From 29b6bd41ee24f69a85666b9f68d500b382d408fd Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 24 Aug 2020 12:11:21 -0700
Subject: [PATCH 033/262] x86/resctrl: Enable user to view thread or core
 throttling mode

Early Intel hardware implementations of Memory Bandwidth Allocation (MBA)
could only control bandwidth at the processor core level. This meant that
when two processes with different bandwidth allocations ran simultaneously
on the same core the hardware had to resolve this difference. It did so by
applying the higher throttling value (lower bandwidth) to both processes.

Newer implementations can apply different throttling values to each
thread on a core.

Introduce a new resctrl file, "thread_throttle_mode", on Intel systems
that shows to the user how throttling values are allocated, per-core or
per-thread.

On systems that support per-core throttling, the file will display "max".
On newer systems that support per-thread throttling, the file will display
"per-thread".

AMD confirmed in [1] that AMD bandwidth allocation is already at thread
level but that the AMD implementation does not use a memory delay
throttle mode. So to avoid confusion the thread throttling mode would be
UNDEFINED on AMD systems and the "thread_throttle_mode" file will not be
visible.

Originally-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/1598296281-127595-3-git-send-email-fenghua.yu@intel.com
Link: [1] https://lore.kernel.org/lkml/18d277fd-6523-319c-d560-66b63ff606b8@amd.com
---
 Documentation/x86/resctrl_ui.rst       | 18 ++++++++-
 arch/x86/kernel/cpu/resctrl/core.c     | 11 ++++++
 arch/x86/kernel/cpu/resctrl/internal.h | 30 ++++++++++++---
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 53 +++++++++++++++++++++++++-
 4 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/Documentation/x86/resctrl_ui.rst b/Documentation/x86/resctrl_ui.rst
index 5368cedfb5309d..e59b7b93a9b49b 100644
--- a/Documentation/x86/resctrl_ui.rst
+++ b/Documentation/x86/resctrl_ui.rst
@@ -138,6 +138,18 @@ with respect to allocation:
 		non-linear. This field is purely informational
 		only.
 
+"thread_throttle_mode":
+		Indicator on Intel systems of how tasks running on threads
+		of a physical core are throttled in cases where they
+		request different memory bandwidth percentages:
+
+		"max":
+			the smallest percentage is applied
+			to all threads
+		"per-thread":
+			bandwidth percentages are directly applied to
+			the threads running on the core
+
 If RDT monitoring is available there will be an "L3_MON" directory
 with the following files:
 
@@ -364,8 +376,10 @@ to the next control step available on the hardware.
 
 The bandwidth throttling is a core specific mechanism on some of Intel
 SKUs. Using a high bandwidth and a low bandwidth setting on two threads
-sharing a core will result in both threads being throttled to use the
-low bandwidth. The fact that Memory bandwidth allocation(MBA) is a core
+sharing a core may result in both threads being throttled to use the
+low bandwidth (see "thread_throttle_mode").
+
+The fact that Memory bandwidth allocation(MBA) may be a core
 specific mechanism where as memory bandwidth monitoring(MBM) is done at
 the package level may lead to confusion when users try to apply control
 via the MBA and then monitor the bandwidth to see if the controls are
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 1c00f2f0bf0c23..9e1712e8aef7ad 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -273,6 +273,12 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
 	}
 	r->data_width = 3;
 
+	if (boot_cpu_has(X86_FEATURE_PER_THREAD_MBA))
+		r->membw.throttle_mode = THREAD_THROTTLE_PER_THREAD;
+	else
+		r->membw.throttle_mode = THREAD_THROTTLE_MAX;
+	thread_throttle_mode_init();
+
 	r->alloc_capable = true;
 	r->alloc_enabled = true;
 
@@ -293,6 +299,11 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
 	r->membw.delay_linear = false;
 	r->membw.arch_needs_linear = false;
 
+	/*
+	 * AMD does not use memory delay throttle model to control
+	 * the allocation like Intel does.
+	 */
+	r->membw.throttle_mode = THREAD_THROTTLE_UNDEFINED;
 	r->membw.min_bw = 0;
 	r->membw.bw_gran = 1;
 	/* Max value is 2048, Data width should be 4 in decimal */
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 0b4829484c9e95..80fa997fae60e3 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -371,22 +371,39 @@ struct rdt_cache {
 	bool		arch_has_empty_bitmaps;
 };
 
+/**
+ * enum membw_throttle_mode - System's memory bandwidth throttling mode
+ * @THREAD_THROTTLE_UNDEFINED:	Not relevant to the system
+ * @THREAD_THROTTLE_MAX:	Memory bandwidth is throttled at the core
+ *				always using smallest bandwidth percentage
+ *				assigned to threads, aka "max throttling"
+ * @THREAD_THROTTLE_PER_THREAD:	Memory bandwidth is throttled at the thread
+ */
+enum membw_throttle_mode {
+	THREAD_THROTTLE_UNDEFINED = 0,
+	THREAD_THROTTLE_MAX,
+	THREAD_THROTTLE_PER_THREAD,
+};
+
 /**
  * struct rdt_membw - Memory bandwidth allocation related data
  * @min_bw:		Minimum memory bandwidth percentage user can request
  * @bw_gran:		Granularity at which the memory bandwidth is allocated
  * @delay_linear:	True if memory B/W delay is in linear scale
  * @arch_needs_linear:	True if we can't configure non-linear resources
+ * @throttle_mode:	Bandwidth throttling mode when threads request
+ *			different memory bandwidths
  * @mba_sc:		True if MBA software controller(mba_sc) is enabled
  * @mb_map:		Mapping of memory B/W percentage to memory B/W delay
  */
 struct rdt_membw {
-	u32		min_bw;
-	u32		bw_gran;
-	u32		delay_linear;
-	bool		arch_needs_linear;
-	bool		mba_sc;
-	u32		*mb_map;
+	u32				min_bw;
+	u32				bw_gran;
+	u32				delay_linear;
+	bool				arch_needs_linear;
+	enum membw_throttle_mode	throttle_mode;
+	bool				mba_sc;
+	u32				*mb_map;
 };
 
 static inline bool is_llc_occupancy_enabled(void)
@@ -607,5 +624,6 @@ void cqm_handle_limbo(struct work_struct *work);
 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
 void __check_limbo(struct rdt_domain *d, bool force_free);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
+void __init thread_throttle_mode_init(void);
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 78f3be10e215e9..b494187632b2b1 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1027,6 +1027,19 @@ static int max_threshold_occ_show(struct kernfs_open_file *of,
 	return 0;
 }
 
+static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
+					 struct seq_file *seq, void *v)
+{
+	struct rdt_resource *r = of->kn->parent->priv;
+
+	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
+		seq_puts(seq, "per-thread\n");
+	else
+		seq_puts(seq, "max\n");
+
+	return 0;
+}
+
 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
 				       char *buf, size_t nbytes, loff_t off)
 {
@@ -1523,6 +1536,17 @@ static struct rftype res_common_files[] = {
 		.seq_show	= rdt_delay_linear_show,
 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
 	},
+	/*
+	 * Platform specific which (if any) capabilities are provided by
+	 * thread_throttle_mode. Defer "fflags" initialization to platform
+	 * discovery.
+	 */
+	{
+		.name		= "thread_throttle_mode",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_thread_throttle_mode_show,
+	},
 	{
 		.name		= "max_threshold_occupancy",
 		.mode		= 0644,
@@ -1593,7 +1617,7 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
 	lockdep_assert_held(&rdtgroup_mutex);
 
 	for (rft = rfts; rft < rfts + len; rft++) {
-		if ((fflags & rft->fflags) == rft->fflags) {
+		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
 			ret = rdtgroup_add_file(kn, rft);
 			if (ret)
 				goto error;
@@ -1610,6 +1634,33 @@ static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
 	return ret;
 }
 
+static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
+{
+	struct rftype *rfts, *rft;
+	int len;
+
+	rfts = res_common_files;
+	len = ARRAY_SIZE(res_common_files);
+
+	for (rft = rfts; rft < rfts + len; rft++) {
+		if (!strcmp(rft->name, name))
+			return rft;
+	}
+
+	return NULL;
+}
+
+void __init thread_throttle_mode_init(void)
+{
+	struct rftype *rft;
+
+	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
+	if (!rft)
+		return;
+
+	rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
+}
+
 /**
  * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
  * @r: The resource group with which the file is associated.

From 1e36d9c6886849c6f3d3c836370563e6bc1a6ddd Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 24 Aug 2020 15:12:37 -0700
Subject: [PATCH 034/262] x86/mce: Delay clearing IA32_MCG_STATUS to the end of
 do_machine_check()

A long time ago, Linux cleared IA32_MCG_STATUS at the very end of machine
check processing.

Then, some fancy recovery and IST manipulation was added in:

  d4812e169de4 ("x86, mce: Get rid of TIF_MCE_NOTIFY and associated mce tricks")

and clearing IA32_MCG_STATUS was pulled earlier in the function.

Next change moved the actual recovery out of do_machine_check() and
just used task_work_add() to schedule it later (before returning to the
user):

  5567d11c21a1 ("x86/mce: Send #MC singal from task work")

Most recently the fancy IST footwork was removed as no longer needed:

  b052df3da821 ("x86/entry: Get rid of ist_begin/end_non_atomic()")

At this point there is no reason remaining to clear IA32_MCG_STATUS early.
It can move back to the very end of the function.

Also move sync_core(). The comments for this function say that it should
only be called when instructions have been changed/re-mapped. Recovery
for an instruction fetch may change the physical address. But that
doesn't happen until the scheduled work runs (which could be on another
CPU).

 [ bp: Massage commit message. ]

Reported-by: Gabriele Paoloni <gabriele.paoloni@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200824221237.5397-1-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/core.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f43a78bde670b8..0ba24dfffdb278 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1190,6 +1190,7 @@ static void kill_me_maybe(struct callback_head *cb)
 
 	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
+		sync_core();
 		return;
 	}
 
@@ -1330,12 +1331,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
 	if (worst > 0)
 		irq_work_queue(&mce_irq_work);
 
-	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-
-	sync_core();
-
 	if (worst != MCE_AR_SEVERITY && !kill_it)
-		return;
+		goto out;
 
 	/* Fault was in user mode and we need to take some action */
 	if ((m.cs & 3) == 3) {
@@ -1364,6 +1361,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
 				mce_panic("Failed kernel mode recovery", &m, msg);
 		}
 	}
+out:
+	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
 

From ab2dd173330a3f07142e68cd65682205036cd00f Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 26 Aug 2020 10:00:45 -0700
Subject: [PATCH 035/262] selftests/x86/fsgsbase: Reap a forgotten child

The ptrace() test forgot to reap its child.  Reap it.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/e7700a503f30e79ab35a63103938a19893dbeff2.1598461151.git.luto@kernel.org
---
 tools/testing/selftests/x86/fsgsbase.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 99831955352372..0056e2597f53a0 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -517,6 +517,9 @@ static void test_ptrace_write_gsbase(void)
 
 END:
 	ptrace(PTRACE_CONT, child, NULL, NULL);
+	wait(&status);
+	if (!WIFEXITED(status))
+		printf("[WARN]\tChild didn't exit cleanly.\n");
 }
 
 int main()

From 1b9abd1755ad947d7c9913e92e7837b533124c90 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 26 Aug 2020 10:00:46 -0700
Subject: [PATCH 036/262] selftests/x86/fsgsbase: Test PTRACE_PEEKUSER for
 GSBASE with invalid LDT GS

This tests commit:

  8ab49526b53d ("x86/fsgsbase/64: Fix NULL deref in 86_fsgsbase_read_task")

Unpatched kernels will OOPS.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/c618ae86d1f757e01b1a8e79869f553cb88acf9a.1598461151.git.luto@kernel.org
---
 tools/testing/selftests/x86/fsgsbase.c | 65 ++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 0056e2597f53a0..7161cfc2e60b4e 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -443,6 +443,68 @@ static void test_unexpected_base(void)
 
 #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
 
+static void test_ptrace_write_gs_read_base(void)
+{
+	int status;
+	pid_t child = fork();
+
+	if (child < 0)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+		printf("[RUN]\tARCH_SET_GS to 1\n");
+		if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+			err(1, "ARCH_SET_GS");
+
+		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		raise(SIGTRAP);
+		_exit(0);
+	}
+
+	wait(&status);
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		unsigned long base;
+		unsigned long gs_offset = USER_REGS_OFFSET(gs);
+		unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+		/* Read the initial base.  It should be 1. */
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+		if (base == 1) {
+			printf("[OK]\tGSBASE started at 1\n");
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+		}
+
+		printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+		/* Poke an LDT selector into GS. */
+		if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+			err(1, "PTRACE_POKEUSER");
+
+		/* And read the base. */
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+		if (base == 0 || base == 1) {
+			printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+		}
+	}
+
+	ptrace(PTRACE_CONT, child, NULL, NULL);
+
+	wait(&status);
+	if (!WIFEXITED(status))
+		printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
 static void test_ptrace_write_gsbase(void)
 {
 	int status;
@@ -529,6 +591,9 @@ int main()
 	shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
 			      MAP_ANONYMOUS | MAP_SHARED, -1, 0);
 
+	/* Do these tests before we have an LDT. */
+	test_ptrace_write_gs_read_base();
+
 	/* Probe FSGSBASE */
 	sethandler(SIGILL, sigill, 0);
 	if (sigsetjmp(jmpbuf, 1) == 0) {

From e33ab2064836600603289ad2ed0ca3424f395fa8 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Wed, 19 Aug 2020 19:29:10 +0800
Subject: [PATCH 037/262] x86/mpparse: Remove duplicate io_apic.h include

Remove asm/io_apic.h which is included more than once.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Link: https://lkml.kernel.org/r/20200819112910.7629-1-wanghai38@huawei.com
---
 arch/x86/kernel/mpparse.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 411af4aa7b51f3..00e51626eeb033 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -24,7 +24,6 @@
 #include <asm/irqdomain.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
-#include <asm/io_apic.h>
 #include <asm/proto.h>
 #include <asm/bios_ebda.h>
 #include <asm/e820/api.h>

From 18ec63faefb3fd311822556cd9b949f66b1eecee Mon Sep 17 00:00:00 2001
From: Kyung Min Park <kyung.min.park@intel.com>
Date: Tue, 25 Aug 2020 08:47:57 +0800
Subject: [PATCH 038/262] x86/cpufeatures: Enumerate TSX suspend load address
 tracking instructions

Intel TSX suspend load tracking instructions aim to give a way to choose
which memory accesses do not need to be tracked in the TSX read set. Add
TSX suspend load tracking CPUID feature flag TSXLDTRK for enumeration.

A processor supports Intel TSX suspend load address tracking if
CPUID.0x07.0x0:EDX[16] is present. Two instructions XSUSLDTRK, XRESLDTRK
are available when this feature is present.

The CPU feature flag is shown as "tsxldtrk" in /proc/cpuinfo.

Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1598316478-23337-2-git-send-email-cathy.zhang@intel.com
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366c0..83fc9d38eb1fca 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -368,6 +368,7 @@
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */

From 61aa9a0a5eae2100c171698bffabde8d5e9f694d Mon Sep 17 00:00:00 2001
From: Cathy Zhang <cathy.zhang@intel.com>
Date: Tue, 25 Aug 2020 08:47:58 +0800
Subject: [PATCH 039/262] x86/kvm: Expose TSX Suspend Load Tracking feature

TSX suspend load tracking instruction is supported by the Intel uarch
Sapphire Rapids. It aims to give a way to choose which memory accesses
do not need to be tracked in the TSX read set. It's availability is
indicated as CPUID.(EAX=7,ECX=0):EDX[bit 16].

Expose TSX Suspend Load Address Tracking feature in KVM CPUID, so KVM
could pass this information to guests and they can make use of this
feature accordingly.

Signed-off-by: Cathy Zhang <cathy.zhang@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1598316478-23337-3-git-send-email-cathy.zhang@intel.com
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 3fd6eec202d7a6..7456f9ad424b4a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -371,7 +371,7 @@ void kvm_set_cpu_caps(void)
 		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
 		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
 		F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
-		F(SERIALIZE)
+		F(SERIALIZE) | F(TSXLDTRK)
 	);
 
 	/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */

From ea4e3bef4c94d5b5d8e790f37b48b7641172bcf5 Mon Sep 17 00:00:00 2001
From: Kyung Min Park <kyung.min.park@intel.com>
Date: Mon, 31 Aug 2020 11:35:00 -0700
Subject: [PATCH 040/262] Documentation/x86: Add documentation for
 /proc/cpuinfo feature flags

/proc/cpuinfo shows features which the kernel supports. Some of these
flags are derived from CPUID, and others are derived from other sources,
including some that are entirely software-based. Currently, there is
not any documentation in the kernel about how /proc/cpuinfo flags are
generated and what it means when they are missing.

Add documentation for /proc/cpuinfo feature flags enumeration.
Document how and when x86 feature flags are used. Also discuss what
their presence or absence mean for the kernel and users.

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Co-developed-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200831183500.15481-1-kyung.min.park@intel.com
---
 Documentation/x86/cpuinfo.rst | 155 ++++++++++++++++++++++++++++++++++
 Documentation/x86/index.rst   |   1 +
 2 files changed, 156 insertions(+)
 create mode 100644 Documentation/x86/cpuinfo.rst

diff --git a/Documentation/x86/cpuinfo.rst b/Documentation/x86/cpuinfo.rst
new file mode 100644
index 00000000000000..5d54c39a063ff2
--- /dev/null
+++ b/Documentation/x86/cpuinfo.rst
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+x86 Feature Flags
+=================
+
+Introduction
+============
+
+On x86, flags appearing in /proc/cpuinfo have an X86_FEATURE definition
+in arch/x86/include/asm/cpufeatures.h. If the kernel cares about a feature
+or KVM want to expose the feature to a KVM guest, it can and should have
+an X86_FEATURE_* defined. These flags represent hardware features as
+well as software features.
+
+If users want to know if a feature is available on a given system, they
+try to find the flag in /proc/cpuinfo. If a given flag is present, it
+means that the kernel supports it and is currently making it available.
+If such flag represents a hardware feature, it also means that the
+hardware supports it.
+
+If the expected flag does not appear in /proc/cpuinfo, things are murkier.
+Users need to find out the reason why the flag is missing and find the way
+how to enable it, which is not always easy. There are several factors that
+can explain missing flags: the expected feature failed to enable, the feature
+is missing in hardware, platform firmware did not enable it, the feature is
+disabled at build or run time, an old kernel is in use, or the kernel does
+not support the feature and thus has not enabled it. In general, /proc/cpuinfo
+shows features which the kernel supports. For a full list of CPUID flags
+which the CPU supports, use tools/arch/x86/kcpuid.
+
+How are feature flags created?
+==============================
+
+a: Feature flags can be derived from the contents of CPUID leaves.
+------------------------------------------------------------------
+These feature definitions are organized mirroring the layout of CPUID
+leaves and grouped in words with offsets as mapped in enum cpuid_leafs
+in cpufeatures.h (see arch/x86/include/asm/cpufeatures.h for details).
+If a feature is defined with a X86_FEATURE_<name> definition in
+cpufeatures.h, and if it is detected at run time, the flags will be
+displayed accordingly in /proc/cpuinfo. For example, the flag "avx2"
+comes from X86_FEATURE_AVX2 in cpufeatures.h.
+
+b: Flags can be from scattered CPUID-based features.
+----------------------------------------------------
+Hardware features enumerated in sparsely populated CPUID leaves get
+software-defined values. Still, CPUID needs to be queried to determine
+if a given feature is present. This is done in init_scattered_cpuid_features().
+For instance, X86_FEATURE_CQM_LLC is defined as 11*32 + 0 and its presence is
+checked at runtime in the respective CPUID leaf [EAX=f, ECX=0] bit EDX[1].
+
+The intent of scattering CPUID leaves is to not bloat struct
+cpuinfo_x86.x86_capability[] unnecessarily. For instance, the CPUID leaf
+[EAX=7, ECX=0] has 30 features and is dense, but the CPUID leaf [EAX=7, EAX=1]
+has only one feature and would waste 31 bits of space in the x86_capability[]
+array. Since there is a struct cpuinfo_x86 for each possible CPU, the wasted
+memory is not trivial.
+
+c: Flags can be created synthetically under certain conditions for hardware features.
+-------------------------------------------------------------------------------------
+Examples of conditions include whether certain features are present in
+MSR_IA32_CORE_CAPS or specific CPU models are identified. If the needed
+conditions are met, the features are enabled by the set_cpu_cap or
+setup_force_cpu_cap macros. For example, if bit 5 is set in MSR_IA32_CORE_CAPS,
+the feature X86_FEATURE_SPLIT_LOCK_DETECT will be enabled and
+"split_lock_detect" will be displayed. The flag "ring3mwait" will be
+displayed only when running on INTEL_FAM6_XEON_PHI_[KNL|KNM] processors.
+
+d: Flags can represent purely software features.
+------------------------------------------------
+These flags do not represent hardware features. Instead, they represent a
+software feature implemented in the kernel. For example, Kernel Page Table
+Isolation is purely software feature and its feature flag X86_FEATURE_PTI is
+also defined in cpufeatures.h.
+
+Naming of Flags
+===============
+
+The script arch/x86/kernel/cpu/mkcapflags.sh processes the
+#define X86_FEATURE_<name> from cpufeatures.h and generates the
+x86_cap/bug_flags[] arrays in kernel/cpu/capflags.c. The names in the
+resulting x86_cap/bug_flags[] are used to populate /proc/cpuinfo. The naming
+of flags in the x86_cap/bug_flags[] are as follows:
+
+a: The name of the flag is from the string in X86_FEATURE_<name> by default.
+----------------------------------------------------------------------------
+By default, the flag <name> in /proc/cpuinfo is extracted from the respective
+X86_FEATURE_<name> in cpufeatures.h. For example, the flag "avx2" is from
+X86_FEATURE_AVX2.
+
+b: The naming can be overridden.
+--------------------------------
+If the comment on the line for the #define X86_FEATURE_* starts with a
+double-quote character (""), the string inside the double-quote characters
+will be the name of the flags. For example, the flag "sse4_1" comes from
+the comment "sse4_1" following the X86_FEATURE_XMM4_1 definition.
+
+There are situations in which overriding the displayed name of the flag is
+needed. For instance, /proc/cpuinfo is a userspace interface and must remain
+constant. If, for some reason, the naming of X86_FEATURE_<name> changes, one
+shall override the new naming with the name already used in /proc/cpuinfo.
+
+c: The naming override can be "", which means it will not appear in /proc/cpuinfo.
+----------------------------------------------------------------------------------
+The feature shall be omitted from /proc/cpuinfo if it does not make sense for
+the feature to be exposed to userspace. For example, X86_FEATURE_ALWAYS is
+defined in cpufeatures.h but that flag is an internal kernel feature used
+in the alternative runtime patching functionality. So, its name is overridden
+with "". Its flag will not appear in /proc/cpuinfo.
+
+Flags are missing when one or more of these happen
+==================================================
+
+a: The hardware does not enumerate support for it.
+--------------------------------------------------
+For example, when a new kernel is running on old hardware or the feature is
+not enabled by boot firmware. Even if the hardware is new, there might be a
+problem enabling the feature at run time, the flag will not be displayed.
+
+b: The kernel does not know about the flag.
+-------------------------------------------
+For example, when an old kernel is running on new hardware.
+
+c: The kernel disabled support for it at compile-time.
+------------------------------------------------------
+For example, if 5-level-paging is not enabled when building (i.e.,
+CONFIG_X86_5LEVEL is not selected) the flag "la57" will not show up [#f1]_.
+Even though the feature will still be detected via CPUID, the kernel disables
+it by clearing via setup_clear_cpu_cap(X86_FEATURE_LA57).
+
+d: The feature is disabled at boot-time.
+----------------------------------------
+A feature can be disabled either using a command-line parameter or because
+it failed to be enabled. The command-line parameter clearcpuid= can be used
+to disable features using the feature number as defined in
+/arch/x86/include/asm/cpufeatures.h. For instance, User Mode Instruction
+Protection can be disabled using clearcpuid=514. The number 514 is calculated
+from #define X86_FEATURE_UMIP (16*32 + 2).
+
+In addition, there exists a variety of custom command-line parameters that
+disable specific features. The list of parameters includes, but is not limited
+to, nofsgsbase, nosmap, and nosmep. 5-level paging can also be disabled using
+"no5lvl". SMAP and SMEP are disabled with the aforementioned parameters,
+respectively.
+
+e: The feature was known to be non-functional.
+----------------------------------------------
+The feature was known to be non-functional because a dependency was
+missing at runtime. For example, AVX flags will not show up if XSAVE feature
+is disabled since they depend on XSAVE feature. Another example would be broken
+CPUs and them missing microcode patches. Due to that, the kernel decides not to
+enable a feature.
+
+.. [#f1] 5-level paging uses linear address of 57 bits.
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 265d9e9a093b8d..d5adb0ab8668bc 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -9,6 +9,7 @@ x86-specific Documentation
    :numbered:
 
    boot
+   cpuinfo
    topology
    exception-tables
    kernel-stacks

From 4a17e8513376bb23f814d3e340a5692a12c69369 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 31 Aug 2020 09:38:58 -0700
Subject: [PATCH 041/262] microblaze: fix kbuild redundant file warning

Fix build warning since this file is already listed in
include/asm-generic/Kbuild.

../scripts/Makefile.asm-generic:25: redundant generic-y found in arch/microblaze/include/asm/Kbuild: hw_irq.h

Fixes: 630f289b7114 ("asm-generic: make more kernel-space headers mandatory")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/4d992aee-8a69-1769-e622-8d6d6e316346@infradead.org
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/microblaze/include/asm/Kbuild | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 2e87a9b6d312fe..63bce836b9f10f 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += extable.h
-generic-y += hw_irq.h
 generic-y += kvm_para.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h

From 857a3139bd8be4f702c030c8ca06f3fd69c1741a Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 26 Aug 2020 20:14:37 +0800
Subject: [PATCH 042/262] EDAC/i5100: Fix error handling order in
 i5100_init_one()

When pci_get_device_func() fails, the driver doesn't need to execute
pci_dev_put(). mci should still be freed, though, to prevent a memory
leak. When pci_enable_device() fails, the error injection PCI device
"einj" doesn't need to be disabled either.

 [ bp: Massage commit message, rename label to "bail_mc_free". ]

Fixes: 52608ba205461 ("i5100_edac: probe for device 19 function 0")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200826121437.31606-1-dinghao.liu@zju.edu.cn
---
 drivers/edac/i5100_edac.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 191aa7c19ded70..324a46b8479b0f 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -1061,16 +1061,15 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 				    PCI_DEVICE_ID_INTEL_5100_19, 0);
 	if (!einj) {
 		ret = -ENODEV;
-		goto bail_einj;
+		goto bail_mc_free;
 	}
 
 	rc = pci_enable_device(einj);
 	if (rc < 0) {
 		ret = rc;
-		goto bail_disable_einj;
+		goto bail_einj;
 	}
 
-
 	mci->pdev = &pdev->dev;
 
 	priv = mci->pvt_info;
@@ -1136,14 +1135,14 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 bail_scrub:
 	priv->scrub_enable = 0;
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
-	edac_mc_free(mci);
-
-bail_disable_einj:
 	pci_disable_device(einj);
 
 bail_einj:
 	pci_dev_put(einj);
 
+bail_mc_free:
+	edac_mc_free(mci);
+
 bail_disable_ch1:
 	pci_disable_device(ch1mm);
 

From 0cbc2659123e6946ba926c5ec3871efe310123e8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 22 Jun 2020 13:28:34 +0100
Subject: [PATCH 043/262] arm64: vdso32: Remove a bunch of #ifdef
 CONFIG_COMPAT_VDSO guards

Most of the compat vDSO code can be built and guarded using IS_ENABLED,
so drop the unnecessary #ifdefs.

Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/vdso.c | 44 ++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index d4202a32abc9ce..1501b22a3cf66f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -30,15 +30,11 @@
 #include <asm/vdso.h>
 
 extern char vdso_start[], vdso_end[];
-#ifdef CONFIG_COMPAT_VDSO
 extern char vdso32_start[], vdso32_end[];
-#endif /* CONFIG_COMPAT_VDSO */
 
 enum vdso_abi {
 	VDSO_ABI_AA64,
-#ifdef CONFIG_COMPAT_VDSO
 	VDSO_ABI_AA32,
-#endif /* CONFIG_COMPAT_VDSO */
 };
 
 enum vvar_pages {
@@ -284,21 +280,17 @@ static int __setup_additional_pages(enum vdso_abi abi,
 /*
  * Create and map the vectors page for AArch32 tasks.
  */
-#ifdef CONFIG_COMPAT_VDSO
 static int aarch32_vdso_mremap(const struct vm_special_mapping *sm,
 		struct vm_area_struct *new_vma)
 {
 	return __vdso_remap(VDSO_ABI_AA32, sm, new_vma);
 }
-#endif /* CONFIG_COMPAT_VDSO */
 
 enum aarch32_map {
 	AA32_MAP_VECTORS, /* kuser helpers */
-#ifdef CONFIG_COMPAT_VDSO
+	AA32_MAP_SIGPAGE,
 	AA32_MAP_VVAR,
 	AA32_MAP_VDSO,
-#endif
-	AA32_MAP_SIGPAGE
 };
 
 static struct page *aarch32_vectors_page __ro_after_init;
@@ -309,7 +301,10 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
 		.name	= "[vectors]", /* ABI */
 		.pages	= &aarch32_vectors_page,
 	},
-#ifdef CONFIG_COMPAT_VDSO
+	[AA32_MAP_SIGPAGE] = {
+		.name	= "[sigpage]", /* ABI */
+		.pages	= &aarch32_sig_page,
+	},
 	[AA32_MAP_VVAR] = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
@@ -319,11 +314,6 @@ static struct vm_special_mapping aarch32_vdso_maps[] = {
 		.name = "[vdso]",
 		.mremap = aarch32_vdso_mremap,
 	},
-#endif /* CONFIG_COMPAT_VDSO */
-	[AA32_MAP_SIGPAGE] = {
-		.name	= "[sigpage]", /* ABI */
-		.pages	= &aarch32_sig_page,
-	},
 };
 
 static int aarch32_alloc_kuser_vdso_page(void)
@@ -362,25 +352,25 @@ static int aarch32_alloc_sigpage(void)
 	return 0;
 }
 
-#ifdef CONFIG_COMPAT_VDSO
 static int __aarch32_alloc_vdso_pages(void)
 {
+
+	if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
+		return 0;
+
 	vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
 	vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
 
 	return __vdso_init(VDSO_ABI_AA32);
 }
-#endif /* CONFIG_COMPAT_VDSO */
 
 static int __init aarch32_alloc_vdso_pages(void)
 {
 	int ret;
 
-#ifdef CONFIG_COMPAT_VDSO
 	ret = __aarch32_alloc_vdso_pages();
 	if (ret)
 		return ret;
-#endif
 
 	ret = aarch32_alloc_sigpage();
 	if (ret)
@@ -449,14 +439,14 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (ret)
 		goto out;
 
-#ifdef CONFIG_COMPAT_VDSO
-	ret = __setup_additional_pages(VDSO_ABI_AA32,
-				       mm,
-				       bprm,
-				       uses_interp);
-	if (ret)
-		goto out;
-#endif /* CONFIG_COMPAT_VDSO */
+	if (IS_ENABLED(CONFIG_COMPAT_VDSO)) {
+		ret = __setup_additional_pages(VDSO_ABI_AA32,
+					       mm,
+					       bprm,
+					       uses_interp);
+		if (ret)
+			goto out;
+	}
 
 	ret = aarch32_sigreturn_setup(mm);
 out:

From 2a30aca81a72d71e128eb692cc4755f33e317670 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 22 Jun 2020 13:37:09 +0100
Subject: [PATCH 044/262] arm64: vdso: Fix unusual formatting in
 *setup_additional_pages()

There's really no need to put every parameter on a new line when calling
a function with a long name, so reformat the *setup_additional_pages()
functions in the vDSO setup code to follow the usual conventions.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/vdso.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 1501b22a3cf66f..debb8995d57fbe 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -440,9 +440,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		goto out;
 
 	if (IS_ENABLED(CONFIG_COMPAT_VDSO)) {
-		ret = __setup_additional_pages(VDSO_ABI_AA32,
-					       mm,
-					       bprm,
+		ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm,
 					       uses_interp);
 		if (ret)
 			goto out;
@@ -487,8 +485,7 @@ static int __init vdso_init(void)
 }
 arch_initcall(vdso_init);
 
-int arch_setup_additional_pages(struct linux_binprm *bprm,
-				int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
 	int ret;
@@ -496,11 +493,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 
-	ret = __setup_additional_pages(VDSO_ABI_AA64,
-				       mm,
-				       bprm,
-				       uses_interp);
-
+	ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp);
 	mmap_write_unlock(mm);
 
 	return ret;

From afce6996943be265fa39240b67025cfcb1bcdfb1 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 27 Aug 2020 09:07:42 +0200
Subject: [PATCH 045/262] EDAC/aspeed: Fix handling of platform_get_irq() error

platform_get_irq() returns a negative error number on error. In such a
case, comparison to 0 would pass the check therefore check the return
value properly, whether it is negative.

 [ bp: Massage commit message. ]

Fixes: 9b7e6242ee4e ("EDAC, aspeed: Add an Aspeed AST2500 EDAC driver")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Stefan Schaeckeler <schaecsn@gmx.net>
Link: https://lkml.kernel.org/r/20200827070743.26628-1-krzk@kernel.org
---
 drivers/edac/aspeed_edac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
index b194658b8b5c9f..fbec28dc661d79 100644
--- a/drivers/edac/aspeed_edac.c
+++ b/drivers/edac/aspeed_edac.c
@@ -209,8 +209,8 @@ static int config_irq(void *ctx, struct platform_device *pdev)
 	/* register interrupt handler */
 	irq = platform_get_irq(pdev, 0);
 	dev_dbg(&pdev->dev, "got irq %d\n", irq);
-	if (!irq)
-		return -ENODEV;
+	if (irq < 0)
+		return irq;
 
 	rc = devm_request_irq(&pdev->dev, irq, mcr_isr, IRQF_TRIGGER_HIGH,
 			      DRV_NAME, ctx);

From 66077adb70a2a9e92540155b2ace33ec98299c90 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Thu, 27 Aug 2020 09:07:43 +0200
Subject: [PATCH 046/262] EDAC/ti: Fix handling of platform_get_irq() error

platform_get_irq() returns a negative error number on error. In such a
case, comparison to 0 would pass the check therefore check the return
value properly, whether it is negative.

 [ bp: Massage commit message. ]

Fixes: 86a18ee21e5e ("EDAC, ti: Add support for TI keystone and DRA7xx EDAC")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tero Kristo <t-kristo@ti.com>
Link: https://lkml.kernel.org/r/20200827070743.26628-2-krzk@kernel.org
---
 drivers/edac/ti_edac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/ti_edac.c b/drivers/edac/ti_edac.c
index 6e52796a0b4147..e7eae20f83d1d0 100644
--- a/drivers/edac/ti_edac.c
+++ b/drivers/edac/ti_edac.c
@@ -278,7 +278,8 @@ static int ti_edac_probe(struct platform_device *pdev)
 
 	/* add EMIF ECC error handler */
 	error_irq = platform_get_irq(pdev, 0);
-	if (!error_irq) {
+	if (error_irq < 0) {
+		ret = error_irq;
 		edac_printk(KERN_ERR, EDAC_MOD_NAME,
 			    "EMIF irq number not defined.\n");
 		goto err;

From c058b1c4a5ea7b88cce4c961c1000acf482ea64b Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Fri, 6 Sep 2019 10:55:29 +0100
Subject: [PATCH 047/262] arm64: mte: system register definitions

Add Memory Tagging Extension system register definitions together with
the relevant bitfields.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_arm.h     |  1 +
 arch/arm64/include/asm/sysreg.h      | 53 ++++++++++++++++++++++++++++
 arch/arm64/include/uapi/asm/ptrace.h |  1 +
 arch/arm64/kernel/ptrace.c           |  2 +-
 4 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1da8e3dc445551..1593669cca4d4a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,6 +12,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_ATA		(UL(1) << 56)
 #define HCR_FWB		(UL(1) << 46)
 #define HCR_API		(UL(1) << 41)
 #define HCR_APK		(UL(1) << 40)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 554a7e8ecb0746..6fa9aa477e76c4 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -91,10 +91,12 @@
 #define PSTATE_PAN			pstate_field(0, 4)
 #define PSTATE_UAO			pstate_field(0, 3)
 #define PSTATE_SSBS			pstate_field(3, 1)
+#define PSTATE_TCO			pstate_field(3, 4)
 
 #define SET_PSTATE_PAN(x)		__emit_inst(0xd500401f | PSTATE_PAN | ((!!x) << PSTATE_Imm_shift))
 #define SET_PSTATE_UAO(x)		__emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
 #define SET_PSTATE_SSBS(x)		__emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
+#define SET_PSTATE_TCO(x)		__emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift))
 
 #define __SYS_BARRIER_INSN(CRm, op2, Rt) \
 	__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
@@ -181,6 +183,8 @@
 #define SYS_SCTLR_EL1			sys_reg(3, 0, 1, 0, 0)
 #define SYS_ACTLR_EL1			sys_reg(3, 0, 1, 0, 1)
 #define SYS_CPACR_EL1			sys_reg(3, 0, 1, 0, 2)
+#define SYS_RGSR_EL1			sys_reg(3, 0, 1, 0, 5)
+#define SYS_GCR_EL1			sys_reg(3, 0, 1, 0, 6)
 
 #define SYS_ZCR_EL1			sys_reg(3, 0, 1, 2, 0)
 
@@ -218,6 +222,8 @@
 #define SYS_ERXADDR_EL1			sys_reg(3, 0, 5, 4, 3)
 #define SYS_ERXMISC0_EL1		sys_reg(3, 0, 5, 5, 0)
 #define SYS_ERXMISC1_EL1		sys_reg(3, 0, 5, 5, 1)
+#define SYS_TFSR_EL1			sys_reg(3, 0, 5, 6, 0)
+#define SYS_TFSRE0_EL1			sys_reg(3, 0, 5, 6, 1)
 
 #define SYS_FAR_EL1			sys_reg(3, 0, 6, 0, 0)
 #define SYS_PAR_EL1			sys_reg(3, 0, 7, 4, 0)
@@ -368,6 +374,7 @@
 
 #define SYS_CCSIDR_EL1			sys_reg(3, 1, 0, 0, 0)
 #define SYS_CLIDR_EL1			sys_reg(3, 1, 0, 0, 1)
+#define SYS_GMID_EL1			sys_reg(3, 1, 0, 0, 4)
 #define SYS_AIDR_EL1			sys_reg(3, 1, 0, 0, 7)
 
 #define SYS_CSSELR_EL1			sys_reg(3, 2, 0, 0, 0)
@@ -460,6 +467,7 @@
 #define SYS_ESR_EL2			sys_reg(3, 4, 5, 2, 0)
 #define SYS_VSESR_EL2			sys_reg(3, 4, 5, 2, 3)
 #define SYS_FPEXC32_EL2			sys_reg(3, 4, 5, 3, 0)
+#define SYS_TFSR_EL2			sys_reg(3, 4, 5, 6, 0)
 #define SYS_FAR_EL2			sys_reg(3, 4, 6, 0, 0)
 
 #define SYS_VDISR_EL2			sys_reg(3, 4, 12, 1,  1)
@@ -516,6 +524,7 @@
 #define SYS_AFSR0_EL12			sys_reg(3, 5, 5, 1, 0)
 #define SYS_AFSR1_EL12			sys_reg(3, 5, 5, 1, 1)
 #define SYS_ESR_EL12			sys_reg(3, 5, 5, 2, 0)
+#define SYS_TFSR_EL12			sys_reg(3, 5, 5, 6, 0)
 #define SYS_FAR_EL12			sys_reg(3, 5, 6, 0, 0)
 #define SYS_MAIR_EL12			sys_reg(3, 5, 10, 2, 0)
 #define SYS_AMAIR_EL12			sys_reg(3, 5, 10, 3, 0)
@@ -531,6 +540,15 @@
 
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_DSSBS	(BIT(44))
+#define SCTLR_ELx_ATA	(BIT(43))
+
+#define SCTLR_ELx_TCF_SHIFT	40
+#define SCTLR_ELx_TCF_NONE	(UL(0x0) << SCTLR_ELx_TCF_SHIFT)
+#define SCTLR_ELx_TCF_SYNC	(UL(0x1) << SCTLR_ELx_TCF_SHIFT)
+#define SCTLR_ELx_TCF_ASYNC	(UL(0x2) << SCTLR_ELx_TCF_SHIFT)
+#define SCTLR_ELx_TCF_MASK	(UL(0x3) << SCTLR_ELx_TCF_SHIFT)
+
+#define SCTLR_ELx_ITFSB	(BIT(37))
 #define SCTLR_ELx_ENIA	(BIT(31))
 #define SCTLR_ELx_ENIB	(BIT(30))
 #define SCTLR_ELx_ENDA	(BIT(27))
@@ -559,6 +577,14 @@
 #endif
 
 /* SCTLR_EL1 specific flags. */
+#define SCTLR_EL1_ATA0		(BIT(42))
+
+#define SCTLR_EL1_TCF0_SHIFT	38
+#define SCTLR_EL1_TCF0_NONE	(UL(0x0) << SCTLR_EL1_TCF0_SHIFT)
+#define SCTLR_EL1_TCF0_SYNC	(UL(0x1) << SCTLR_EL1_TCF0_SHIFT)
+#define SCTLR_EL1_TCF0_ASYNC	(UL(0x2) << SCTLR_EL1_TCF0_SHIFT)
+#define SCTLR_EL1_TCF0_MASK	(UL(0x3) << SCTLR_EL1_TCF0_SHIFT)
+
 #define SCTLR_EL1_BT1		(BIT(36))
 #define SCTLR_EL1_BT0		(BIT(35))
 #define SCTLR_EL1_UCI		(BIT(26))
@@ -595,6 +621,7 @@
 #define MAIR_ATTR_DEVICE_GRE		UL(0x0c)
 #define MAIR_ATTR_NORMAL_NC		UL(0x44)
 #define MAIR_ATTR_NORMAL_WT		UL(0xbb)
+#define MAIR_ATTR_NORMAL_TAGGED		UL(0xf0)
 #define MAIR_ATTR_NORMAL		UL(0xff)
 #define MAIR_ATTR_MASK			UL(0xff)
 
@@ -686,6 +713,10 @@
 #define ID_AA64PFR1_SSBS_PSTATE_INSNS	2
 #define ID_AA64PFR1_BT_BTI		0x1
 
+#define ID_AA64PFR1_MTE_NI		0x0
+#define ID_AA64PFR1_MTE_EL0		0x1
+#define ID_AA64PFR1_MTE			0x2
+
 /* id_aa64zfr0 */
 #define ID_AA64ZFR0_F64MM_SHIFT		56
 #define ID_AA64ZFR0_F32MM_SHIFT		52
@@ -920,6 +951,28 @@
 #define CPACR_EL1_ZEN_EL0EN	(BIT(17)) /* enable EL0 access, if EL1EN set */
 #define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
 
+/* TCR EL1 Bit Definitions */
+#define SYS_TCR_EL1_TCMA1	(BIT(58))
+#define SYS_TCR_EL1_TCMA0	(BIT(57))
+
+/* GCR_EL1 Definitions */
+#define SYS_GCR_EL1_RRND	(BIT(16))
+#define SYS_GCR_EL1_EXCL_MASK	0xffffUL
+
+/* RGSR_EL1 Definitions */
+#define SYS_RGSR_EL1_TAG_MASK	0xfUL
+#define SYS_RGSR_EL1_SEED_SHIFT	8
+#define SYS_RGSR_EL1_SEED_MASK	0xffffUL
+
+/* GMID_EL1 field definitions */
+#define SYS_GMID_EL1_BS_SHIFT	0
+#define SYS_GMID_EL1_BS_SIZE	4
+
+/* TFSR{,E0}_EL1 bit definitions */
+#define SYS_TFSR_EL1_TF0_SHIFT	0
+#define SYS_TFSR_EL1_TF1_SHIFT	1
+#define SYS_TFSR_EL1_TF0	(UL(1) << SYS_TFSR_EL1_TF0_SHIFT)
+#define SYS_TFSR_EL1_TF1	(UK(2) << SYS_TFSR_EL1_TF1_SHIFT)
 
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL	(BIT(31))
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 42cbe34d95ceeb..06413d9f234181 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -51,6 +51,7 @@
 #define PSR_PAN_BIT	0x00400000
 #define PSR_UAO_BIT	0x00800000
 #define PSR_DIT_BIT	0x01000000
+#define PSR_TCO_BIT	0x02000000
 #define PSR_V_BIT	0x10000000
 #define PSR_C_BIT	0x20000000
 #define PSR_Z_BIT	0x40000000
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index d8ebfd813e28c5..8942de814b7233 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1793,7 +1793,7 @@ void syscall_trace_exit(struct pt_regs *regs)
  * We also reserve IL for the kernel; SS is handled dynamically.
  */
 #define SPSR_EL1_AARCH64_RES0_BITS \
-	(GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \
+	(GENMASK_ULL(63, 32) | GENMASK_ULL(27, 26) | GENMASK_ULL(23, 22) | \
 	 GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5))
 #define SPSR_EL1_AARCH32_RES0_BITS \
 	(GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20))

From 0178dc7613684561ff3bb1625cd5504f1e7fbe3d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 27 Nov 2019 09:51:13 +0000
Subject: [PATCH 048/262] arm64: mte: Use Normal Tagged attributes for the
 linear map

Once user space is given access to tagged memory, the kernel must be
able to clear/save/restore tags visible to the user. This is done via
the linear mapping, therefore map it as such. The new MT_NORMAL_TAGGED
index for MAIR_EL1 is initially mapped as Normal memory and later
changed to Normal Tagged via the cpufeature infrastructure. From a
mismatched attribute aliases perspective, the Tagged memory is
considered a permission and it won't lead to undefined behaviour.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
---
 arch/arm64/include/asm/memory.h       |  1 +
 arch/arm64/include/asm/pgtable-prot.h |  2 ++
 arch/arm64/mm/dump.c                  |  4 ++++
 arch/arm64/mm/mmu.c                   | 20 ++++++++++++++++++--
 arch/arm64/mm/proc.S                  |  8 ++++++--
 5 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index afa722504bfde9..1e0a7826641063 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -133,6 +133,7 @@
 #define MT_NORMAL_NC		3
 #define MT_NORMAL		4
 #define MT_NORMAL_WT		5
+#define MT_NORMAL_TAGGED	6
 
 /*
  * Memory types for Stage-2 translation
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 4d867c6446c484..afd8b9fc76f2dc 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -50,6 +50,7 @@ extern bool arm64_use_ng_mappings;
 #define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
 #define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
 #define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_TAGGED	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -59,6 +60,7 @@ extern bool arm64_use_ng_mappings;
 #define _HYP_PAGE_DEFAULT	_PAGE_DEFAULT
 
 #define PAGE_KERNEL		__pgprot(PROT_NORMAL)
+#define PAGE_KERNEL_TAGGED	__pgprot(PROT_NORMAL_TAGGED)
 #define PAGE_KERNEL_RO		__pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
 #define PAGE_KERNEL_ROX		__pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(PROT_NORMAL & ~PTE_PXN)
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0b8da1cc1c07e8..ba6d1d89f9b2ae 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -169,6 +169,10 @@ static const struct prot_bits pte_bits[] = {
 		.mask	= PTE_ATTRINDX_MASK,
 		.val	= PTE_ATTRINDX(MT_NORMAL),
 		.set	= "MEM/NORMAL",
+	}, {
+		.mask	= PTE_ATTRINDX_MASK,
+		.val	= PTE_ATTRINDX(MT_NORMAL_TAGGED),
+		.set	= "MEM/NORMAL-TAGGED",
 	}
 };
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 75df62fea1b68a..936c4762dadff8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -122,7 +122,7 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 	 * The following mapping attributes may be updated in live
 	 * kernel mappings without the need for break-before-make.
 	 */
-	static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
+	pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
 
 	/* creating or taking down mappings is always safe */
 	if (old == 0 || new == 0)
@@ -136,6 +136,17 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
 	if (old & ~new & PTE_NG)
 		return false;
 
+	/*
+	 * Changing the memory type between Normal and Normal-Tagged is safe
+	 * since Tagged is considered a permission attribute from the
+	 * mismatched attribute aliases perspective.
+	 */
+	if (((old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
+	     (old & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)) &&
+	    ((new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL) ||
+	     (new & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_TAGGED)))
+		mask |= PTE_ATTRINDX_MASK;
+
 	return ((old ^ new) & ~mask) == 0;
 }
 
@@ -491,7 +502,12 @@ static void __init map_mem(pgd_t *pgdp)
 		if (memblock_is_nomap(reg))
 			continue;
 
-		__map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
+		/*
+		 * The linear map must allow allocation tags reading/writing
+		 * if MTE is present. Otherwise, it has the same attributes as
+		 * PAGE_KERNEL.
+		 */
+		__map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags);
 	}
 
 	/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 796e47a571e663..4817ed52e3439c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -44,14 +44,18 @@
 #define TCR_KASAN_FLAGS 0
 #endif
 
-/* Default MAIR_EL1 */
+/*
+ * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and
+ * changed during __cpu_setup to Normal Tagged if the system supports MTE.
+ */
 #define MAIR_EL1_SET							\
 	(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |	\
 	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |	\
 	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |		\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |		\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |			\
-	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) |		\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL_TAGGED))
 
 #ifdef CONFIG_CPU_PM
 /**

From 3b714d24ef173f81c78af16f73dcc9b40428c803 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Fri, 6 Sep 2019 10:58:01 +0100
Subject: [PATCH 049/262] arm64: mte: CPU feature detection and initial sysreg
 configuration

Add the cpufeature and hwcap entries to detect the presence of MTE. Any
secondary CPU not supporting the feature, if detected on the boot CPU,
will be parked.

Add the minimum SCTLR_EL1 and HCR_EL2 bits for enabling MTE. The Normal
Tagged memory type is configured in MAIR_EL1 before the MMU is enabled
in order to avoid disrupting other CPUs in the CnP domain.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
---
 arch/arm64/include/asm/cpucaps.h    |  3 ++-
 arch/arm64/include/asm/cpufeature.h |  6 ++++++
 arch/arm64/include/asm/hwcap.h      |  2 +-
 arch/arm64/include/asm/kvm_arm.h    |  2 +-
 arch/arm64/include/asm/sysreg.h     |  1 +
 arch/arm64/include/uapi/asm/hwcap.h |  2 +-
 arch/arm64/kernel/cpufeature.c      | 17 +++++++++++++++++
 arch/arm64/kernel/cpuinfo.c         |  2 +-
 arch/arm64/mm/proc.S                | 24 ++++++++++++++++++++++++
 9 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 07b643a7071005..1937653b05a339 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -64,7 +64,8 @@
 #define ARM64_BTI				54
 #define ARM64_HAS_ARMv8_4_TTL			55
 #define ARM64_HAS_TLB_RANGE			56
+#define ARM64_MTE				57
 
-#define ARM64_NCAPS				57
+#define ARM64_NCAPS				58
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 89b4f0142c2878..680b5b36ddd514 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -681,6 +681,12 @@ static __always_inline bool system_uses_irq_prio_masking(void)
 	       cpus_have_const_cap(ARM64_HAS_IRQ_PRIO_MASKING);
 }
 
+static inline bool system_supports_mte(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_MTE) &&
+		cpus_have_const_cap(ARM64_MTE);
+}
+
 static inline bool system_has_prio_mask_debugging(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_DEBUG_PRIORITY_MASKING) &&
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 22f73fe0903099..0d4a6741b6a57e 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -95,7 +95,7 @@
 #define KERNEL_HWCAP_DGH		__khwcap2_feature(DGH)
 #define KERNEL_HWCAP_RNG		__khwcap2_feature(RNG)
 #define KERNEL_HWCAP_BTI		__khwcap2_feature(BTI)
-/* reserved for KERNEL_HWCAP_MTE	__khwcap2_feature(MTE) */
+#define KERNEL_HWCAP_MTE		__khwcap2_feature(MTE)
 
 /*
  * This yields a mask that user programs can use to figure out what
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1593669cca4d4a..5aee5f79b24e76 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -79,7 +79,7 @@
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO | HCR_PTW )
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6fa9aa477e76c4..daf030a05de09e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -613,6 +613,7 @@
 			 SCTLR_EL1_SA0  | SCTLR_EL1_SED  | SCTLR_ELx_I    |\
 			 SCTLR_EL1_DZE  | SCTLR_EL1_UCT                   |\
 			 SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
+			 SCTLR_ELx_ITFSB| SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 |\
 			 ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_RES1)
 
 /* MAIR_ELx memory attributes (used by Linux) */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 912162f735298d..b8f41aa234ee16 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -74,6 +74,6 @@
 #define HWCAP2_DGH		(1 << 15)
 #define HWCAP2_RNG		(1 << 16)
 #define HWCAP2_BTI		(1 << 17)
-/* reserved for HWCAP2_MTE	(1 << 18) */
+#define HWCAP2_MTE		(1 << 18)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6424584be01e6d..fabc8a23722360 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -227,6 +227,8 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MTE_SHIFT, 4, ID_AA64PFR1_MTE_NI),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
 				    FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
@@ -2121,6 +2123,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 	},
 #endif
+#ifdef CONFIG_ARM64_MTE
+	{
+		.desc = "Memory Tagging Extension",
+		.capability = ARM64_MTE,
+		.type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR1_EL1,
+		.field_pos = ID_AA64PFR1_MTE_SHIFT,
+		.min_field_value = ID_AA64PFR1_MTE,
+		.sign = FTR_UNSIGNED,
+	},
+#endif /* CONFIG_ARM64_MTE */
 	{},
 };
 
@@ -2237,6 +2251,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
 	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
 #endif
+#ifdef CONFIG_ARM64_MTE
+	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE),
+#endif /* CONFIG_ARM64_MTE */
 	{},
 };
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index d0076c2159e667..6104b87f021dca 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -93,7 +93,7 @@ static const char *const hwcap_str[] = {
 	"dgh",
 	"rng",
 	"bti",
-	/* reserved for "mte" */
+	"mte",
 	NULL
 };
 
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 4817ed52e3439c..23c326a06b2d40 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -18,6 +18,7 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative.h>
 #include <asm/smp.h>
+#include <asm/sysreg.h>
 
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
@@ -425,6 +426,29 @@ SYM_FUNC_START(__cpu_setup)
 	 * Memory region attributes
 	 */
 	mov_q	x5, MAIR_EL1_SET
+#ifdef CONFIG_ARM64_MTE
+	/*
+	 * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported
+	 * (ID_AA64PFR1_EL1[11:8] > 1).
+	 */
+	mrs	x10, ID_AA64PFR1_EL1
+	ubfx	x10, x10, #ID_AA64PFR1_MTE_SHIFT, #4
+	cmp	x10, #ID_AA64PFR1_MTE
+	b.lt	1f
+
+	/* Normal Tagged memory type at the corresponding MAIR index */
+	mov	x10, #MAIR_ATTR_NORMAL_TAGGED
+	bfi	x5, x10, #(8 *  MT_NORMAL_TAGGED), #8
+
+	/* initialize GCR_EL1: all non-zero tags excluded by default */
+	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
+	msr_s	SYS_GCR_EL1, x10
+
+	/* clear any pending tag check faults in TFSR*_EL1 */
+	msr_s	SYS_TFSR_EL1, xzr
+	msr_s	SYS_TFSRE0_EL1, xzr
+1:
+#endif
 	msr	mair_el1, x5
 	/*
 	 * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for

From eb3621798bcdd34ba480109bac357ba6a784d3e2 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Thu, 27 Aug 2020 19:17:35 +0200
Subject: [PATCH 050/262] x86/entry/64: Do not include inst.h in calling.h

inst.h was included in calling.h solely to instantiate the RDPID macro.
The usage of RDPID was removed in

  6a3ea3e68b8a ("x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM")

so remove the include.

Fixes: 6a3ea3e68b8a ("x86/entry/64: Do not use RDPID in paranoid entry to accomodate KVM")
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200827171735.93825-1-ubizjak@gmail.com
---
 arch/x86/entry/calling.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index ae9b0d4615b325..07a9331d55e73d 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -6,7 +6,6 @@
 #include <asm/percpu.h>
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
-#include <asm/inst.h>
 
 /*
 

From 2ac638fc5724f011f8ba1b425667c5592e1571ce Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 26 Aug 2020 18:52:59 +0100
Subject: [PATCH 051/262] arm64: kvm: mte: Hide the MTE CPUID information from
 the guests

KVM does not support MTE in guests yet, so clear the corresponding field
in the ID_AA64PFR1_EL1 register. In addition, inject an undefined
exception in the guest if it accesses one of the GCR_EL1, RGSR_EL1,
TFSR_EL1 or TFSRE0_EL1 registers. While the emulate_sys_reg() function
already injects an undefined exception, this patch prevents the
unnecessary printk.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steven Price <steven.price@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/sys_regs.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 077293b5115fa0..379f4969d0bd7e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1131,6 +1131,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 		if (!vcpu_has_sve(vcpu))
 			val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
 		val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
+	} else if (id == SYS_ID_AA64PFR1_EL1) {
+		val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT);
 	} else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
 		val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) |
 			 (0xfUL << ID_AA64ISAR1_API_SHIFT) |
@@ -1382,6 +1384,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
+			    const struct sys_reg_desc *r)
+{
+	kvm_inject_undefined(vcpu);
+	return false;
+}
+
 /* sys_reg_desc initialiser for known cpufeature ID registers */
 #define ID_SANITISED(name) {			\
 	SYS_DESC(SYS_##name),			\
@@ -1547,6 +1556,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 },
 	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
 	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
+
+	{ SYS_DESC(SYS_RGSR_EL1), access_mte_regs },
+	{ SYS_DESC(SYS_GCR_EL1), access_mte_regs },
+
 	{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
 	{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
 	{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
@@ -1571,6 +1584,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
 	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
 
+	{ SYS_DESC(SYS_TFSR_EL1), access_mte_regs },
+	{ SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs },
+
 	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
 	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
 

From 74f1082487feb90bbf880af14beb8e29c3030c9f Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Wed, 7 Aug 2019 12:21:05 +0100
Subject: [PATCH 052/262] arm64: mte: Add specific SIGSEGV codes

Add MTE-specific SIGSEGV codes to siginfo.h and update the x86
BUILD_BUG_ON(NSIGSEGV != 7) compile check.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
[catalin.marinas@arm.com: renamed precise/imprecise to sync/async]
[catalin.marinas@arm.com: dropped #ifdef __aarch64__, renumbered]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Will Deacon <will@kernel.org>
---
 arch/x86/kernel/signal_compat.c    | 2 +-
 include/uapi/asm-generic/siginfo.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 9ccbf0576cd0eb..a7f3e12cfbdb8b 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -27,7 +27,7 @@ static inline void signal_compat_build_tests(void)
 	 */
 	BUILD_BUG_ON(NSIGILL  != 11);
 	BUILD_BUG_ON(NSIGFPE  != 15);
-	BUILD_BUG_ON(NSIGSEGV != 7);
+	BUILD_BUG_ON(NSIGSEGV != 9);
 	BUILD_BUG_ON(NSIGBUS  != 5);
 	BUILD_BUG_ON(NSIGTRAP != 5);
 	BUILD_BUG_ON(NSIGCHLD != 6);
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index cb3d6c2671812b..7aacf9389010f9 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -229,7 +229,9 @@ typedef struct siginfo {
 #define SEGV_ACCADI	5	/* ADI not enabled for mapped object */
 #define SEGV_ADIDERR	6	/* Disrupting MCD error */
 #define SEGV_ADIPERR	7	/* Precise MCD exception */
-#define NSIGSEGV	7
+#define SEGV_MTEAERR	8	/* Asynchronous ARM MTE error */
+#define SEGV_MTESERR	9	/* Synchronous ARM MTE exception */
+#define NSIGSEGV	9
 
 /*
  * SIGBUS si_codes

From 637ec831ea4f09c7529ac4078399ce4e25b46341 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Mon, 16 Sep 2019 11:51:17 +0100
Subject: [PATCH 053/262] arm64: mte: Handle synchronous and asynchronous tag
 check faults

The Memory Tagging Extension has two modes of notifying a tag check
fault at EL0, configurable through the SCTLR_EL1.TCF0 field:

1. Synchronous raising of a Data Abort exception with DFSC 17.
2. Asynchronous setting of a cumulative bit in TFSRE0_EL1.

Add the exception handler for the synchronous exception and handling of
the asynchronous TFSRE0_EL1.TF0 bit setting via a new TIF flag in
do_notify_resume().

On a tag check failure in user-space, whether synchronous or
asynchronous, a SIGSEGV will be raised on the faulting thread.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h         | 23 +++++++++++++++++
 arch/arm64/include/asm/thread_info.h |  4 ++-
 arch/arm64/kernel/Makefile           |  1 +
 arch/arm64/kernel/entry.S            | 37 ++++++++++++++++++++++++++++
 arch/arm64/kernel/mte.c              | 21 ++++++++++++++++
 arch/arm64/kernel/process.c          |  8 +++++-
 arch/arm64/kernel/signal.c           |  9 +++++++
 arch/arm64/kernel/syscall.c          | 10 ++++++++
 arch/arm64/mm/fault.c                |  9 ++++++-
 9 files changed, 119 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/include/asm/mte.h
 create mode 100644 arch/arm64/kernel/mte.c

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
new file mode 100644
index 00000000000000..a0bf310da74b42
--- /dev/null
+++ b/arch/arm64/include/asm/mte.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_MTE_H
+#define __ASM_MTE_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_ARM64_MTE
+
+void flush_mte_state(void);
+
+#else
+
+static inline void flush_mte_state(void)
+{
+}
+
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_MTE_H  */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 5e784e16ee8958..1fbab854a51b0e 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -67,6 +67,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
+#define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
 #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
@@ -96,10 +97,11 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_SVE		(1 << TIF_SVE)
+#define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_FSCHECK)
+				 _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index a561cbb91d4dc5..5fb9b728459b88 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_ARM_SDE_INTERFACE)		+= sdei.o
 obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
 obj-$(CONFIG_ARM64_PTR_AUTH)		+= pointer_auth.o
 obj-$(CONFIG_SHADOW_CALL_STACK)		+= scs.o
+obj-$(CONFIG_ARM64_MTE)			+= mte.o
 
 obj-y					+= vdso/ probes/
 obj-$(CONFIG_COMPAT_VDSO)		+= vdso32/
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 55af8b504b65ab..ff34461524d4cd 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -149,6 +149,32 @@ alternative_cb_end
 #endif
 	.endm
 
+	/* Check for MTE asynchronous tag check faults */
+	.macro check_mte_async_tcf, flgs, tmp
+#ifdef CONFIG_ARM64_MTE
+alternative_if_not ARM64_MTE
+	b	1f
+alternative_else_nop_endif
+	mrs_s	\tmp, SYS_TFSRE0_EL1
+	tbz	\tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f
+	/* Asynchronous TCF occurred for TTBR0 access, set the TI flag */
+	orr	\flgs, \flgs, #_TIF_MTE_ASYNC_FAULT
+	str	\flgs, [tsk, #TSK_TI_FLAGS]
+	msr_s	SYS_TFSRE0_EL1, xzr
+1:
+#endif
+	.endm
+
+	/* Clear the MTE asynchronous tag check faults */
+	.macro clear_mte_async_tcf
+#ifdef CONFIG_ARM64_MTE
+alternative_if ARM64_MTE
+	dsb	ish
+	msr_s	SYS_TFSRE0_EL1, xzr
+alternative_else_nop_endif
+#endif
+	.endm
+
 	.macro	kernel_entry, el, regsize = 64
 	.if	\regsize == 32
 	mov	w0, w0				// zero upper 32 bits of x0
@@ -182,6 +208,8 @@ alternative_cb_end
 	ldr	x19, [tsk, #TSK_TI_FLAGS]
 	disable_step_tsk x19, x20
 
+	/* Check for asynchronous tag check faults in user space */
+	check_mte_async_tcf x19, x22
 	apply_ssbd 1, x22, x23
 
 	ptrauth_keys_install_kernel tsk, x20, x22, x23
@@ -233,6 +261,13 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
 	str	x20, [sp, #S_PMR_SAVE]
 alternative_else_nop_endif
 
+	/* Re-enable tag checking (TCO set on exception entry) */
+#ifdef CONFIG_ARM64_MTE
+alternative_if ARM64_MTE
+	SET_PSTATE_TCO(0)
+alternative_else_nop_endif
+#endif
+
 	/*
 	 * Registers that may be useful after this macro is invoked:
 	 *
@@ -744,6 +779,8 @@ SYM_CODE_START_LOCAL(ret_to_user)
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
 finish_ret_to_user:
+	/* Ignore asynchronous tag check faults in the uaccess routines */
+	clear_mte_async_tcf
 	enable_step_tsk x1, x2
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	bl	stackleak_erase
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
new file mode 100644
index 00000000000000..032016823957ba
--- /dev/null
+++ b/arch/arm64/kernel/mte.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
+#include <asm/sysreg.h>
+
+void flush_mte_state(void)
+{
+	if (!system_supports_mte())
+		return;
+
+	/* clear any pending asynchronous tag fault */
+	dsb(ish);
+	write_sysreg_s(0, SYS_TFSRE0_EL1);
+	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f1804496b93508..a49028efab6865 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
 #include <asm/exec.h>
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
+#include <asm/mte.h>
 #include <asm/processor.h>
 #include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
@@ -239,7 +240,7 @@ static void print_pstate(struct pt_regs *regs)
 		const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
 					       PSR_BTYPE_SHIFT];
 
-		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n",
+		printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO BTYPE=%s)\n",
 			pstate,
 			pstate & PSR_N_BIT ? 'N' : 'n',
 			pstate & PSR_Z_BIT ? 'Z' : 'z',
@@ -251,6 +252,7 @@ static void print_pstate(struct pt_regs *regs)
 			pstate & PSR_F_BIT ? 'F' : 'f',
 			pstate & PSR_PAN_BIT ? '+' : '-',
 			pstate & PSR_UAO_BIT ? '+' : '-',
+			pstate & PSR_TCO_BIT ? '+' : '-',
 			btype_str);
 	}
 }
@@ -336,6 +338,7 @@ void flush_thread(void)
 	tls_thread_flush();
 	flush_ptrace_hw_breakpoint(current);
 	flush_tagged_addr_state();
+	flush_mte_state();
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -368,6 +371,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	dst->thread.sve_state = NULL;
 	clear_tsk_thread_flag(dst, TIF_SVE);
 
+	/* clear any pending asynchronous tag fault raised by the parent */
+	clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
+
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 3b4f31f35e4585..b27e87572ce324 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -748,6 +748,9 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
 		regs->pstate |= PSR_BTYPE_C;
 	}
 
+	/* TCO (Tag Check Override) always cleared for signal handlers */
+	regs->pstate &= ~PSR_TCO_BIT;
+
 	if (ka->sa.sa_flags & SA_RESTORER)
 		sigtramp = ka->sa.sa_restorer;
 	else
@@ -932,6 +935,12 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 			if (thread_flags & _TIF_UPROBE)
 				uprobe_notify_resume(regs);
 
+			if (thread_flags & _TIF_MTE_ASYNC_FAULT) {
+				clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+				send_sig_fault(SIGSEGV, SEGV_MTEAERR,
+					       (void __user *)NULL, current);
+			}
+
 			if (thread_flags & _TIF_SIGPENDING)
 				do_signal(regs);
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 5f0c04863d2c19..e4c0dadf0d92c8 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -123,6 +123,16 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
 	local_daif_restore(DAIF_PROCCTX);
 	user_exit();
 
+	if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) {
+		/*
+		 * Process the asynchronous tag check fault before the actual
+		 * syscall. do_notify_resume() will send a signal to userspace
+		 * before the syscall is restarted.
+		 */
+		regs->regs[0] = -ERESTARTNOINTR;
+		return;
+	}
+
 	if (has_syscall_work(flags)) {
 		/*
 		 * The de-facto standard way to skip a system call using ptrace
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f07333e86c2ff9..a3bd189602df9b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -641,6 +641,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	return 0;
 }
 
+static int do_tag_check_fault(unsigned long addr, unsigned int esr,
+			      struct pt_regs *regs)
+{
+	do_bad_area(addr, esr, regs);
+	return 0;
+}
+
 static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGKILL, SI_KERNEL,	"ttbr address size fault"	},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"level 1 address size fault"	},
@@ -659,7 +666,7 @@ static const struct fault_info fault_info[] = {
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
 	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
 	{ do_sea,		SIGBUS,  BUS_OBJERR,	"synchronous external abort"	},
-	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 17"			},
+	{ do_tag_check_fault,	SIGSEGV, SEGV_MTESERR,	"synchronous tag check fault"	},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 18"			},
 	{ do_bad,		SIGKILL, SI_KERNEL,	"unknown 19"			},
 	{ do_sea,		SIGKILL, SI_KERNEL,	"level 0 (translation table walk)"	},

From 4beba9486abd2f86d125271d6946f7c38ed0fe77 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Wed, 22 Apr 2020 15:25:27 +0100
Subject: [PATCH 054/262] mm: Add PG_arch_2 page flag

For arm64 MTE support it is necessary to be able to mark pages that
contain user space visible tags that will need to be saved/restored e.g.
when swapped out.

To support this add a new arch specific flag (PG_arch_2). This flag is
only available on 64-bit architectures due to the limited number of
spare page flags on the 32-bit ones.

Signed-off-by: Steven Price <steven.price@arm.com>
[catalin.marinas@arm.com: use CONFIG_64BIT for guarding this new flag]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/page.c                    | 3 +++
 include/linux/kernel-page-flags.h | 1 +
 include/linux/page-flags.h        | 3 +++
 include/trace/events/mmflags.h    | 9 ++++++++-
 tools/vm/page-types.c             | 2 ++
 5 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/fs/proc/page.c b/fs/proc/page.c
index f909243d4a669c..9f1077d94cde16 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -217,6 +217,9 @@ u64 stable_page_flags(struct page *page)
 	u |= kpf_copy_bit(k, KPF_PRIVATE_2,	PG_private_2);
 	u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE,	PG_owner_priv_1);
 	u |= kpf_copy_bit(k, KPF_ARCH,		PG_arch_1);
+#ifdef CONFIG_64BIT
+	u |= kpf_copy_bit(k, KPF_ARCH_2,	PG_arch_2);
+#endif
 
 	return u;
 };
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index abd20ef93c98b2..eee1877a354e58 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -17,5 +17,6 @@
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
 #define KPF_SOFTDIRTY		40
+#define KPF_ARCH_2		41
 
 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6be1aa559b1e4c..276140c94f4a80 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -135,6 +135,9 @@ enum pageflags {
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
 	PG_young,
 	PG_idle,
+#endif
+#ifdef CONFIG_64BIT
+	PG_arch_2,
 #endif
 	__NR_PAGEFLAGS,
 
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5fb75203438636..67018d367b9f46 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -79,6 +79,12 @@
 #define IF_HAVE_PG_IDLE(flag,string)
 #endif
 
+#ifdef CONFIG_64BIT
+#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_ARCH_2(flag,string)
+#endif
+
 #define __def_pageflag_names						\
 	{1UL << PG_locked,		"locked"	},		\
 	{1UL << PG_waiters,		"waiters"	},		\
@@ -105,7 +111,8 @@ IF_HAVE_PG_MLOCK(PG_mlocked,		"mlocked"	)		\
 IF_HAVE_PG_UNCACHED(PG_uncached,	"uncached"	)		\
 IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
 IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
-IF_HAVE_PG_IDLE(PG_idle,		"idle"		)
+IF_HAVE_PG_IDLE(PG_idle,		"idle"		)		\
+IF_HAVE_PG_ARCH_2(PG_arch_2,		"arch_2"	)
 
 #define show_page_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 58c0eab71bca34..0517c744b04e8d 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -78,6 +78,7 @@
 #define KPF_ARCH		38
 #define KPF_UNCACHED		39
 #define KPF_SOFTDIRTY		40
+#define KPF_ARCH_2		41
 
 /* [48-] take some arbitrary free slots for expanding overloaded flags
  * not part of kernel API
@@ -135,6 +136,7 @@ static const char * const page_flag_names[] = {
 	[KPF_ARCH]		= "h:arch",
 	[KPF_UNCACHED]		= "c:uncached",
 	[KPF_SOFTDIRTY]		= "f:softdirty",
+	[KPF_ARCH_2]		= "H:arch_2",
 
 	[KPF_READAHEAD]		= "I:readahead",
 	[KPF_SLOB_FREE]		= "P:slob_free",

From 72e6afa08e988744822f9bf18043fc04c4df2178 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 2 Jul 2020 10:19:30 +0100
Subject: [PATCH 055/262] mm: Preserve the PG_arch_2 flag in
 __split_huge_page_tail()

When a huge page is split into normal pages, part of the head page flags
are transferred to the tail pages. However, the PG_arch_* flags are not
part of the preserved set.

PG_arch_2 is used by the arm64 MTE support to mark pages that have valid
tags. The absence of such flag would cause the arm64 set_pte_at() to
clear the tags in order to avoid stale tags exposed to user or the
swapping out hooks to ignore the tags. Not preserving PG_arch_2 on huge
page splitting leads to tag corruption in the tail pages.

Preserve the newly added PG_arch_2 flag in __split_huge_page_tail().

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ccff8472cd4b6..1a5773c95f533c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2337,6 +2337,9 @@ static void __split_huge_page_tail(struct page *head, int tail,
 			 (1L << PG_workingset) |
 			 (1L << PG_locked) |
 			 (1L << PG_unevictable) |
+#ifdef CONFIG_64BIT
+			 (1L << PG_arch_2) |
+#endif
 			 (1L << PG_dirty)));
 
 	/* ->mapping in first tail page is compound_mapcount */

From 34bfeea4a9e9cdae713637541f240c3adfdfede3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 4 May 2020 14:42:36 +0100
Subject: [PATCH 056/262] arm64: mte: Clear the tags when a page is mapped in
 user-space with PROT_MTE

Pages allocated by the kernel are not guaranteed to have the tags
zeroed, especially as the kernel does not (yet) use MTE itself. To
ensure the user can still access such pages when mapped into its address
space, clear the tags via set_pte_at(). A new page flag - PG_mte_tagged
(PG_arch_2) - is used to track pages with valid allocation tags.

Since the zero page is mapped as pte_special(), it won't be covered by
the above set_pte_at() mechanism. Clear its tags during early MTE
initialisation.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h     | 16 +++++++++++++++
 arch/arm64/include/asm/pgtable.h |  7 +++++++
 arch/arm64/kernel/cpufeature.c   | 18 +++++++++++++++++
 arch/arm64/kernel/mte.c          | 14 +++++++++++++
 arch/arm64/lib/Makefile          |  2 ++
 arch/arm64/lib/mte.S             | 34 ++++++++++++++++++++++++++++++++
 6 files changed, 91 insertions(+)
 create mode 100644 arch/arm64/lib/mte.S

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index a0bf310da74b42..1716b3d0248907 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -7,12 +7,28 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/page-flags.h>
+
+#include <asm/pgtable-types.h>
+
+void mte_clear_page_tags(void *addr);
+
 #ifdef CONFIG_ARM64_MTE
 
+/* track which pages have valid allocation tags */
+#define PG_mte_tagged	PG_arch_2
+
+void mte_sync_tags(pte_t *ptep, pte_t pte);
 void flush_mte_state(void);
 
 #else
 
+/* unused if !CONFIG_ARM64_MTE, silence the compiler */
+#define PG_mte_tagged	0
+
+static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
+{
+}
 static inline void flush_mte_state(void)
 {
 }
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d5d3fbe739534f..0a205a8e91b2e2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -9,6 +9,7 @@
 #include <asm/proc-fns.h>
 
 #include <asm/memory.h>
+#include <asm/mte.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable-prot.h>
 #include <asm/tlbflush.h>
@@ -90,6 +91,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
 #define pte_devmap(pte)		(!!(pte_val(pte) & PTE_DEVMAP))
+#define pte_tagged(pte)		((pte_val(pte) & PTE_ATTRINDX_MASK) == \
+				 PTE_ATTRINDX(MT_NORMAL_TAGGED))
 
 #define pte_cont_addr_end(addr, end)						\
 ({	unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK;	\
@@ -284,6 +287,10 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
 		__sync_icache_dcache(pte);
 
+	if (system_supports_mte() &&
+	    pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
+		mte_sync_tags(ptep, pte);
+
 	__check_racy_pte_update(mm, ptep, pte);
 
 	set_pte(ptep, pte);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fabc8a23722360..add9da5d8ea340 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -75,6 +75,7 @@
 #include <asm/cpu_ops.h>
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
+#include <asm/mte.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/traps.h>
@@ -1704,6 +1705,22 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused)
 }
 #endif /* CONFIG_ARM64_BTI */
 
+#ifdef CONFIG_ARM64_MTE
+static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+{
+	static bool cleared_zero_page = false;
+
+	/*
+	 * Clear the tags in the zero page. This needs to be done via the
+	 * linear map which has the Tagged attribute.
+	 */
+	if (!cleared_zero_page) {
+		cleared_zero_page = true;
+		mte_clear_page_tags(lm_alias(empty_zero_page));
+	}
+}
+#endif /* CONFIG_ARM64_MTE */
+
 /* Internal helper functions to match cpu capability type */
 static bool
 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2133,6 +2150,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR1_MTE_SHIFT,
 		.min_field_value = ID_AA64PFR1_MTE,
 		.sign = FTR_UNSIGNED,
+		.cpu_enable = cpu_enable_mte,
 	},
 #endif /* CONFIG_ARM64_MTE */
 	{},
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 032016823957ba..5bf9bbed5a252e 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -3,12 +3,26 @@
  * Copyright (C) 2020 ARM Ltd.
  */
 
+#include <linux/bitops.h>
+#include <linux/mm.h>
 #include <linux/thread_info.h>
 
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
 #include <asm/sysreg.h>
 
+void mte_sync_tags(pte_t *ptep, pte_t pte)
+{
+	struct page *page = pte_page(pte);
+	long i, nr_pages = compound_nr(page);
+
+	/* if PG_mte_tagged is set, tags have already been initialised */
+	for (i = 0; i < nr_pages; i++, page++) {
+		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+			mte_clear_page_tags(page_address(page));
+	}
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 2fc253466dbf8a..d31e1169d9b8e9 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -16,3 +16,5 @@ lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
 obj-$(CONFIG_CRC32) += crc32.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_ARM64_MTE) += mte.o
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
new file mode 100644
index 00000000000000..a367056400862a
--- /dev/null
+++ b/arch/arm64/lib/mte.S
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+
+	.arch	armv8.5-a+memtag
+
+/*
+ * multitag_transfer_size - set \reg to the block size that is accessed by the
+ * LDGM/STGM instructions.
+ */
+	.macro	multitag_transfer_size, reg, tmp
+	mrs_s	\reg, SYS_GMID_EL1
+	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
+	mov	\tmp, #4
+	lsl	\reg, \tmp, \reg
+	.endm
+
+/*
+ * Clear the tags in a page
+ *   x0 - address of the page to be cleared
+ */
+SYM_FUNC_START(mte_clear_page_tags)
+	multitag_transfer_size x1, x2
+1:	stgm	xzr, [x0]
+	add	x0, x0, x1
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+SYM_FUNC_END(mte_clear_page_tags)

From 2563776b41c3190849c6b011c72b47bff314963d Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Tue, 6 Aug 2019 11:37:53 +0100
Subject: [PATCH 057/262] arm64: mte: Tags-aware copy_{user_,}highpage()
 implementations

When the Memory Tagging Extension is enabled, the tags need to be
preserved across page copy (e.g. for copy-on-write, page migration).

Introduce MTE-aware copy_{user_,}highpage() functions to copy tags to
the destination if the source page has the PG_mte_tagged flag set.
copy_user_page() does not need to handle tag copying since, with this
patch, it is only called by the DAX code where there is no source page
structure (and no source tags).

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h  |  4 ++++
 arch/arm64/include/asm/page.h | 14 +++++++++++---
 arch/arm64/lib/mte.S          | 19 +++++++++++++++++++
 arch/arm64/mm/copypage.c      | 25 +++++++++++++++++++++----
 4 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 1716b3d0248907..b2577eee62c225 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -19,6 +19,7 @@ void mte_clear_page_tags(void *addr);
 #define PG_mte_tagged	PG_arch_2
 
 void mte_sync_tags(pte_t *ptep, pte_t pte);
+void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
 
 #else
@@ -29,6 +30,9 @@ void flush_mte_state(void);
 static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
 {
 }
+static inline void mte_copy_page_tags(void *kto, const void *kfrom)
+{
+}
 static inline void flush_mte_state(void)
 {
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index c01b52add37719..11734ce2970280 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -15,18 +15,26 @@
 #include <linux/personality.h> /* for READ_IMPLIES_EXEC */
 #include <asm/pgtable-types.h>
 
+struct page;
+struct vm_area_struct;
+
 extern void __cpu_clear_user_page(void *p, unsigned long user);
-extern void __cpu_copy_user_page(void *to, const void *from,
-				 unsigned long user);
 extern void copy_page(void *to, const void *from);
 extern void clear_page(void *to);
 
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
+
+void copy_highpage(struct page *to, struct page *from);
+#define __HAVE_ARCH_COPY_HIGHPAGE
+
 #define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
 #define clear_user_page(addr,vaddr,pg)  __cpu_clear_user_page(addr, vaddr)
-#define copy_user_page(to,from,vaddr,pg) __cpu_copy_user_page(to, from, vaddr)
+#define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
 typedef struct page *pgtable_t;
 
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index a367056400862a..3c3d0edbbca373 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -5,6 +5,7 @@
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/page.h>
 #include <asm/sysreg.h>
 
 	.arch	armv8.5-a+memtag
@@ -32,3 +33,21 @@ SYM_FUNC_START(mte_clear_page_tags)
 	b.ne	1b
 	ret
 SYM_FUNC_END(mte_clear_page_tags)
+
+/*
+ * Copy the tags from the source page to the destination one
+ *   x0 - address of the destination page
+ *   x1 - address of the source page
+ */
+SYM_FUNC_START(mte_copy_page_tags)
+	mov	x2, x0
+	mov	x3, x1
+	multitag_transfer_size x5, x6
+1:	ldgm	x4, [x3]
+	stgm	x4, [x2]
+	add	x2, x2, x5
+	add	x3, x3, x5
+	tst	x2, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+SYM_FUNC_END(mte_copy_page_tags)
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 2ee7b73433a580..4a2233fa674eef 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -6,18 +6,35 @@
  * Copyright (C) 2012 ARM Ltd.
  */
 
+#include <linux/bitops.h>
 #include <linux/mm.h>
 
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/mte.h>
 
-void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
+void copy_highpage(struct page *to, struct page *from)
 {
-	struct page *page = virt_to_page(kto);
+	struct page *kto = page_address(to);
+	struct page *kfrom = page_address(from);
+
 	copy_page(kto, kfrom);
-	flush_dcache_page(page);
+
+	if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
+		set_bit(PG_mte_tagged, &to->flags);
+		mte_copy_page_tags(kto, kfrom);
+	}
+}
+EXPORT_SYMBOL(copy_highpage);
+
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	copy_highpage(to, from);
+	flush_dcache_page(to);
 }
-EXPORT_SYMBOL_GPL(__cpu_copy_user_page);
+EXPORT_SYMBOL_GPL(copy_user_highpage);
 
 void __cpu_clear_user_page(void *kaddr, unsigned long vaddr)
 {

From 738c8780fc1fa11fb996a962a54703d0450cae59 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Sun, 21 Jun 2020 11:41:27 +0100
Subject: [PATCH 058/262] arm64: Avoid unnecessary clear_user_page()
 indirection

Since clear_user_page() calls clear_page() directly, avoid the
unnecessary indirection.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/page.h | 3 +--
 arch/arm64/mm/copypage.c      | 6 ------
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 11734ce2970280..d918cb1d83a662 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -18,7 +18,6 @@
 struct page;
 struct vm_area_struct;
 
-extern void __cpu_clear_user_page(void *p, unsigned long user);
 extern void copy_page(void *to, const void *from);
 extern void clear_page(void *to);
 
@@ -33,7 +32,7 @@ void copy_highpage(struct page *to, struct page *from);
 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
-#define clear_user_page(addr,vaddr,pg)  __cpu_clear_user_page(addr, vaddr)
+#define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
 typedef struct page *pgtable_t;
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 4a2233fa674eef..70a71f38b6a9e4 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -35,9 +35,3 @@ void copy_user_highpage(struct page *to, struct page *from,
 	flush_dcache_page(to);
 }
 EXPORT_SYMBOL_GPL(copy_user_highpage);
-
-void __cpu_clear_user_page(void *kaddr, unsigned long vaddr)
-{
-	clear_page(kaddr);
-}
-EXPORT_SYMBOL_GPL(__cpu_clear_user_page);

From 4d1a8a2dc0f4cdc2d74491a60709e698f85daf55 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 27 Nov 2019 09:53:44 +0000
Subject: [PATCH 059/262] arm64: mte: Tags-aware aware memcmp_pages()
 implementation

When the Memory Tagging Extension is enabled, two pages are identical
only if both their data and tags are identical.

Make the generic memcmp_pages() a __weak function and add an
arm64-specific implementation which returns non-zero if any of the two
pages contain valid MTE tags (PG_mte_tagged set). There isn't much
benefit in comparing the tags of two pages since these are normally used
for heap allocations and likely to differ anyway.

Co-developed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/mte.c | 26 ++++++++++++++++++++++++++
 mm/util.c               |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 5bf9bbed5a252e..5f54fd140610a7 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -5,6 +5,7 @@
 
 #include <linux/bitops.h>
 #include <linux/mm.h>
+#include <linux/string.h>
 #include <linux/thread_info.h>
 
 #include <asm/cpufeature.h>
@@ -23,6 +24,31 @@ void mte_sync_tags(pte_t *ptep, pte_t pte)
 	}
 }
 
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+	char *addr1, *addr2;
+	int ret;
+
+	addr1 = page_address(page1);
+	addr2 = page_address(page2);
+	ret = memcmp(addr1, addr2, PAGE_SIZE);
+
+	if (!system_supports_mte() || ret)
+		return ret;
+
+	/*
+	 * If the page content is identical but at least one of the pages is
+	 * tagged, return non-zero to avoid KSM merging. If only one of the
+	 * pages is tagged, set_pte_at() may zero or change the tags of the
+	 * other page via mte_sync_tags().
+	 */
+	if (test_bit(PG_mte_tagged, &page1->flags) ||
+	    test_bit(PG_mte_tagged, &page2->flags))
+		return addr1 != addr2;
+
+	return ret;
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
diff --git a/mm/util.c b/mm/util.c
index 5ef378a2a038b5..4e21fe7eae278f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -957,7 +957,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
 	return res;
 }
 
-int memcmp_pages(struct page *page1, struct page *page2)
+int __weak memcmp_pages(struct page *page1, struct page *page2)
 {
 	char *addr1, *addr2;
 	int ret;

From b3fbbea4c00220f62e6f7e2514466e6ee81f7f60 Mon Sep 17 00:00:00 2001
From: Kevin Brodsky <Kevin.Brodsky@arm.com>
Date: Mon, 25 Nov 2019 17:27:06 +0000
Subject: [PATCH 060/262] mm: Introduce arch_calc_vm_flag_bits()

Similarly to arch_calc_vm_prot_bits(), introduce a dummy
arch_calc_vm_flag_bits() invoked from calc_vm_flag_bits(). This macro
can be overridden by architectures to insert specific VM_* flags derived
from the mmap() MAP_* flags.

Signed-off-by: Kevin Brodsky <Kevin.Brodsky@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mman.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/mman.h b/include/linux/mman.h
index 6f34c33075f947..6fa15c9b12af3a 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -78,13 +78,18 @@ static inline void vm_unacct_memory(long pages)
 }
 
 /*
- * Allow architectures to handle additional protection bits
+ * Allow architectures to handle additional protection and flag bits. The
+ * overriding macros must be defined in the arch-specific asm/mman.h file.
  */
 
 #ifndef arch_calc_vm_prot_bits
 #define arch_calc_vm_prot_bits(prot, pkey) 0
 #endif
 
+#ifndef arch_calc_vm_flag_bits
+#define arch_calc_vm_flag_bits(flags) 0
+#endif
+
 #ifndef arch_vm_get_page_prot
 #define arch_vm_get_page_prot(vm_flags) __pgprot(0)
 #endif
@@ -135,7 +140,8 @@ calc_vm_flag_bits(unsigned long flags)
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_DENYWRITE,  VM_DENYWRITE ) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
-	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      );
+	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      ) |
+	       arch_calc_vm_flag_bits(flags);
 }
 
 unsigned long vm_commit_limit(void);

From 9f3419315f3cdc41a7318e4d50ba18a592b30c8c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 27 Nov 2019 10:00:27 +0000
Subject: [PATCH 061/262] arm64: mte: Add PROT_MTE support to mmap() and
 mprotect()

To enable tagging on a memory range, the user must explicitly opt in via
a new PROT_MTE flag passed to mmap() or mprotect(). Since this is a new
memory type in the AttrIndx field of a pte, simplify the or'ing of these
bits over the protection_map[] attributes by making MT_NORMAL index 0.

There are two conditions for arch_vm_get_page_prot() to return the
MT_NORMAL_TAGGED memory type: (1) the user requested it via PROT_MTE,
registered as VM_MTE in the vm_flags, and (2) the vma supports MTE,
decided during the mmap() call (only) and registered as VM_MTE_ALLOWED.

arch_calc_vm_prot_bits() is responsible for registering the user request
as VM_MTE. The newly introduced arch_calc_vm_flag_bits() sets
VM_MTE_ALLOWED if the mapping is MAP_ANONYMOUS. An MTE-capable
filesystem (RAM-based) may be able to set VM_MTE_ALLOWED during its
mmap() file ops call.

In addition, update VM_DATA_DEFAULT_FLAGS to allow mprotect(PROT_MTE) on
stack or brk area.

The Linux mmap() syscall currently ignores unknown PROT_* flags. In the
presence of MTE, an mmap(PROT_MTE) on a file which does not support MTE
will not report an error and the memory will not be mapped as Normal
Tagged. For consistency, mprotect(PROT_MTE) will not report an error
either if the memory range does not support MTE. Two subsequent patches
in the series will propose tightening of this behaviour.

Co-developed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/memory.h    | 18 +++++++-----
 arch/arm64/include/asm/mman.h      | 44 ++++++++++++++++++++++++++++--
 arch/arm64/include/asm/page.h      |  2 +-
 arch/arm64/include/asm/pgtable.h   |  7 ++++-
 arch/arm64/include/uapi/asm/mman.h |  1 +
 fs/proc/task_mmu.c                 |  4 +++
 include/linux/mm.h                 |  8 ++++++
 7 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 1e0a7826641063..e424fc3a68cb50 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -126,14 +126,18 @@
 
 /*
  * Memory types available.
+ *
+ * IMPORTANT: MT_NORMAL must be index 0 since vm_get_page_prot() may 'or' in
+ *	      the MT_NORMAL_TAGGED memory type for PROT_MTE mappings. Note
+ *	      that protection_map[] only contains MT_NORMAL attributes.
  */
-#define MT_DEVICE_nGnRnE	0
-#define MT_DEVICE_nGnRE		1
-#define MT_DEVICE_GRE		2
-#define MT_NORMAL_NC		3
-#define MT_NORMAL		4
-#define MT_NORMAL_WT		5
-#define MT_NORMAL_TAGGED	6
+#define MT_NORMAL		0
+#define MT_NORMAL_TAGGED	1
+#define MT_NORMAL_NC		2
+#define MT_NORMAL_WT		3
+#define MT_DEVICE_nGnRnE	4
+#define MT_DEVICE_nGnRE		5
+#define MT_DEVICE_GRE		6
 
 /*
  * Memory types for Stage-2 translation
diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 081ec8de9ea6b8..b01051be7750f1 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -9,16 +9,51 @@
 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
 	unsigned long pkey __always_unused)
 {
+	unsigned long ret = 0;
+
 	if (system_supports_bti() && (prot & PROT_BTI))
-		return VM_ARM64_BTI;
+		ret |= VM_ARM64_BTI;
 
-	return 0;
+	if (system_supports_mte() && (prot & PROT_MTE))
+		ret |= VM_MTE;
+
+	return ret;
 }
 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
 
+static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+{
+	/*
+	 * Only allow MTE on anonymous mappings as these are guaranteed to be
+	 * backed by tags-capable memory. The vm_flags may be overridden by a
+	 * filesystem supporting MTE (RAM-based).
+	 */
+	if (system_supports_mte() && (flags & MAP_ANONYMOUS))
+		return VM_MTE_ALLOWED;
+
+	return 0;
+}
+#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+
 static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 {
-	return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0);
+	pteval_t prot = 0;
+
+	if (vm_flags & VM_ARM64_BTI)
+		prot |= PTE_GP;
+
+	/*
+	 * There are two conditions required for returning a Normal Tagged
+	 * memory type: (1) the user requested it via PROT_MTE passed to
+	 * mmap() or mprotect() and (2) the corresponding vma supports MTE. We
+	 * register (1) as VM_MTE in the vma->vm_flags and (2) as
+	 * VM_MTE_ALLOWED. Note that the latter can only be set during the
+	 * mmap() call since mprotect() does not accept MAP_* flags.
+	 */
+	if ((vm_flags & VM_MTE) && (vm_flags & VM_MTE_ALLOWED))
+		prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
+
+	return __pgprot(prot);
 }
 #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
 
@@ -30,6 +65,9 @@ static inline bool arch_validate_prot(unsigned long prot,
 	if (system_supports_bti())
 		supported |= PROT_BTI;
 
+	if (system_supports_mte())
+		supported |= PROT_MTE;
+
 	return (prot & ~supported) == 0;
 }
 #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index d918cb1d83a662..012cffc574e890 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -43,7 +43,7 @@ extern int pfn_valid(unsigned long);
 
 #endif /* !__ASSEMBLY__ */
 
-#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+#define VM_DATA_DEFAULT_FLAGS	(VM_DATA_FLAGS_TSK_EXEC | VM_MTE_ALLOWED)
 
 #include <asm-generic/getorder.h>
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0a205a8e91b2e2..057c40b6f5e008 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -681,8 +681,13 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
+	/*
+	 * Normal and Normal-Tagged are two different memory types and indices
+	 * in MAIR_EL1. The mask below has to include PTE_ATTRINDX_MASK.
+	 */
 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |
-			      PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP;
+			      PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP |
+			      PTE_ATTRINDX_MASK;
 	/* preserve the hardware dirty information */
 	if (pte_hw_dirty(pte))
 		pte = pte_mkdirty(pte);
diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h
index 6fdd71eb644f43..1e6482a838e11a 100644
--- a/arch/arm64/include/uapi/asm/mman.h
+++ b/arch/arm64/include/uapi/asm/mman.h
@@ -5,5 +5,6 @@
 #include <asm-generic/mman.h>
 
 #define PROT_BTI	0x10		/* BTI guarded page */
+#define PROT_MTE	0x20		/* Normal Tagged mapping */
 
 #endif /* ! _UAPI__ASM_MMAN_H */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5066b0251ed836..35172a91148e5e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -653,6 +653,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_MERGEABLE)]	= "mg",
 		[ilog2(VM_UFFD_MISSING)]= "um",
 		[ilog2(VM_UFFD_WP)]	= "uw",
+#ifdef CONFIG_ARM64_MTE
+		[ilog2(VM_MTE)]		= "mt",
+		[ilog2(VM_MTE_ALLOWED)]	= "",
+#endif
 #ifdef CONFIG_ARCH_HAS_PKEYS
 		/* These come out via ProtectionKey: */
 		[ilog2(VM_PKEY_BIT0)]	= "",
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ca6e6a81576b93..4312c6c808e91a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -340,6 +340,14 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_MAPPED_COPY	VM_ARCH_1	/* T if mapped copy of data (nommu mmap) */
 #endif
 
+#if defined(CONFIG_ARM64_MTE)
+# define VM_MTE		VM_HIGH_ARCH_0	/* Use Tagged memory for access control */
+# define VM_MTE_ALLOWED	VM_HIGH_ARCH_1	/* Tagged memory permitted */
+#else
+# define VM_MTE		VM_NONE
+# define VM_MTE_ALLOWED	VM_NONE
+#endif
+
 #ifndef VM_GROWSUP
 # define VM_GROWSUP	VM_NONE
 #endif

From c462ac288f2c97e0c1d9ff6a65955317e799f958 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 25 Nov 2019 17:27:06 +0000
Subject: [PATCH 062/262] mm: Introduce arch_validate_flags()

Similarly to arch_validate_prot() called from do_mprotect_pkey(), an
architecture may need to sanity-check the new vm_flags.

Define a dummy function always returning true. In addition to
do_mprotect_pkey(), also invoke it from mmap_region() prior to updating
vma->vm_page_prot to allow the architecture code to veto potentially
inconsistent vm_flags.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mman.h | 13 +++++++++++++
 mm/mmap.c            |  9 +++++++++
 mm/mprotect.c        |  6 ++++++
 3 files changed, 28 insertions(+)

diff --git a/include/linux/mman.h b/include/linux/mman.h
index 6fa15c9b12af3a..629cefc4ecba67 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -108,6 +108,19 @@ static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
 #define arch_validate_prot arch_validate_prot
 #endif
 
+#ifndef arch_validate_flags
+/*
+ * This is called from mmap() and mprotect() with the updated vma->vm_flags.
+ *
+ * Returns true if the VM_* flags are valid.
+ */
+static inline bool arch_validate_flags(unsigned long flags)
+{
+	return true;
+}
+#define arch_validate_flags arch_validate_flags
+#endif
+
 /*
  * Optimisation macro.  It is equivalent to:
  *      (x & bit1) ? bit2 : 0
diff --git a/mm/mmap.c b/mm/mmap.c
index 40248d84ad5fbd..eed30b096667d1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1812,6 +1812,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		vma_set_anonymous(vma);
 	}
 
+	/* Allow architectures to sanity-check the vm_flags */
+	if (!arch_validate_flags(vma->vm_flags)) {
+		error = -EINVAL;
+		if (file)
+			goto unmap_and_free_vma;
+		else
+			goto free_vma;
+	}
+
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	/* Once vma denies write, undo our temporary denial count */
 	if (file) {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ce8b8a5eacbb62..56c02beb604141 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -603,6 +603,12 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 			goto out;
 		}
 
+		/* Allow architectures to sanity-check the new flags */
+		if (!arch_validate_flags(newflags)) {
+			error = -EINVAL;
+			goto out;
+		}
+
 		error = security_file_mprotect(vma, reqprot, prot);
 		if (error)
 			goto out;

From 0042090548740921951f31fc0c20dcdb96638cb0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 9 Aug 2019 15:48:46 +0100
Subject: [PATCH 063/262] arm64: mte: Validate the PROT_MTE request via
 arch_validate_flags()

Make use of the newly introduced arch_validate_flags() hook to
sanity-check the PROT_MTE request passed to mmap() and mprotect(). If
the mapping does not support MTE, these syscalls will return -EINVAL.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mman.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index b01051be7750f1..e3e28f7daf62bd 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -49,8 +49,10 @@ static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 	 * register (1) as VM_MTE in the vma->vm_flags and (2) as
 	 * VM_MTE_ALLOWED. Note that the latter can only be set during the
 	 * mmap() call since mprotect() does not accept MAP_* flags.
+	 * Checking for VM_MTE only is sufficient since arch_validate_flags()
+	 * does not permit (VM_MTE & !VM_MTE_ALLOWED).
 	 */
-	if ((vm_flags & VM_MTE) && (vm_flags & VM_MTE_ALLOWED))
+	if (vm_flags & VM_MTE)
 		prot |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
 
 	return __pgprot(prot);
@@ -72,4 +74,14 @@ static inline bool arch_validate_prot(unsigned long prot,
 }
 #define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr)
 
+static inline bool arch_validate_flags(unsigned long vm_flags)
+{
+	if (!system_supports_mte())
+		return true;
+
+	/* only allow VM_MTE if VM_MTE_ALLOWED has been set previously */
+	return !(vm_flags & VM_MTE) || (vm_flags & VM_MTE_ALLOWED);
+}
+#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
+
 #endif /* ! __ASM_MMAN_H__ */

From 51b0bff2f703f7ecfeb228eaa3d8f6090c18c9c1 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 29 Nov 2019 12:45:08 +0000
Subject: [PATCH 064/262] mm: Allow arm64 mmap(PROT_MTE) on RAM-based files

Since arm64 memory (allocation) tags can only be stored in RAM, mapping
files with PROT_MTE is not allowed by default. RAM-based files like
those in a tmpfs mount or memfd_create() can support memory tagging, so
update the vm_flags accordingly in shmem_mmap().

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/shmem.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/shmem.c b/mm/shmem.c
index 271548ca20f314..ec94f4c7851e44 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2267,6 +2267,9 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 			vma->vm_flags &= ~(VM_MAYWRITE);
 	}
 
+	/* arm64 - allow memory tagging on RAM-based files */
+	vma->vm_flags |= VM_MTE_ALLOWED;
+
 	file_accessed(file);
 	vma->vm_ops = &shmem_vm_ops;
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&

From 1c101da8b971a36695319dce7a24711dc567a0dd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 27 Nov 2019 10:30:15 +0000
Subject: [PATCH 065/262] arm64: mte: Allow user control of the tag check mode
 via prctl()

By default, even if PROT_MTE is set on a memory range, there is no tag
check fault reporting (SIGSEGV). Introduce a set of option to the
exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
check fault mode:

  PR_MTE_TCF_NONE  - no reporting (default)
  PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
  PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting

These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
context-switched by the kernel. Note that the kernel accesses to the
user address space (e.g. read() system call) are not checked if the user
thread tag checking mode is PR_MTE_TCF_NONE or PR_MTE_TCF_ASYNC. If the
tag checking mode is PR_MTE_TCF_SYNC, the kernel makes a best effort to
check its user address accesses, however it cannot always guarantee it.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h       | 14 ++++++
 arch/arm64/include/asm/processor.h |  3 ++
 arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c        | 26 ++++++++--
 include/uapi/linux/prctl.h         |  6 +++
 5 files changed, 123 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index b2577eee62c225..df2efbc9f8f1a4 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
+void mte_thread_switch(struct task_struct *next);
+long set_mte_ctrl(unsigned long arg);
+long get_mte_ctrl(void);
 
 #else
 
@@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 static inline void flush_mte_state(void)
 {
 }
+static inline void mte_thread_switch(struct task_struct *next)
+{
+}
+static inline long set_mte_ctrl(unsigned long arg)
+{
+	return 0;
+}
+static inline long get_mte_ctrl(void)
+{
+	return 0;
+}
 
 #endif
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 240fe5e5b72097..80e7f057330993 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,6 +151,9 @@ struct thread_struct {
 	struct ptrauth_keys_user	keys_user;
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
+#ifdef CONFIG_ARM64_MTE
+	u64			sctlr_tcf0;
+#endif
 };
 
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 5f54fd140610a7..375483a1f57329 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -5,6 +5,8 @@
 
 #include <linux/bitops.h>
 #include <linux/mm.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
 
@@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2)
 	return ret;
 }
 
+static void update_sctlr_el1_tcf0(u64 tcf0)
+{
+	/* ISB required for the kernel uaccess routines */
+	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
+	isb();
+}
+
+static void set_sctlr_el1_tcf0(u64 tcf0)
+{
+	/*
+	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
+	 * optimisation. Disable preemption so that it does not see
+	 * the variable update before the SCTLR_EL1.TCF0 one.
+	 */
+	preempt_disable();
+	current->thread.sctlr_tcf0 = tcf0;
+	update_sctlr_el1_tcf0(tcf0);
+	preempt_enable();
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
@@ -58,4 +80,59 @@ void flush_mte_state(void)
 	dsb(ish);
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+	/* disable tag checking */
+	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+}
+
+void mte_thread_switch(struct task_struct *next)
+{
+	if (!system_supports_mte())
+		return;
+
+	/* avoid expensive SCTLR_EL1 accesses if no change */
+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
+		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+}
+
+long set_mte_ctrl(unsigned long arg)
+{
+	u64 tcf0;
+
+	if (!system_supports_mte())
+		return 0;
+
+	switch (arg & PR_MTE_TCF_MASK) {
+	case PR_MTE_TCF_NONE:
+		tcf0 = SCTLR_EL1_TCF0_NONE;
+		break;
+	case PR_MTE_TCF_SYNC:
+		tcf0 = SCTLR_EL1_TCF0_SYNC;
+		break;
+	case PR_MTE_TCF_ASYNC:
+		tcf0 = SCTLR_EL1_TCF0_ASYNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	set_sctlr_el1_tcf0(tcf0);
+
+	return 0;
+}
+
+long get_mte_ctrl(void)
+{
+	if (!system_supports_mte())
+		return 0;
+
+	switch (current->thread.sctlr_tcf0) {
+	case SCTLR_EL1_TCF0_NONE:
+		return PR_MTE_TCF_NONE;
+	case SCTLR_EL1_TCF0_SYNC:
+		return PR_MTE_TCF_SYNC;
+	case SCTLR_EL1_TCF0_ASYNC:
+		return PR_MTE_TCF_ASYNC;
+	}
+
+	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a49028efab6865..bb759b88d44a80 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -577,6 +577,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	dsb(ish);
 
+	/*
+	 * MTE thread switching must happen after the DSB above to ensure that
+	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
+	 * registers.
+	 */
+	mte_thread_switch(next);
+
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
 
@@ -636,9 +643,15 @@ static unsigned int tagged_addr_disabled;
 
 long set_tagged_addr_ctrl(unsigned long arg)
 {
+	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
+
 	if (is_compat_task())
 		return -EINVAL;
-	if (arg & ~PR_TAGGED_ADDR_ENABLE)
+
+	if (system_supports_mte())
+		valid_mask |= PR_MTE_TCF_MASK;
+
+	if (arg & ~valid_mask)
 		return -EINVAL;
 
 	/*
@@ -648,6 +661,9 @@ long set_tagged_addr_ctrl(unsigned long arg)
 	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
 		return -EINVAL;
 
+	if (set_mte_ctrl(arg) != 0)
+		return -EINVAL;
+
 	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
 
 	return 0;
@@ -655,13 +671,17 @@ long set_tagged_addr_ctrl(unsigned long arg)
 
 long get_tagged_addr_ctrl(void)
 {
+	long ret = 0;
+
 	if (is_compat_task())
 		return -EINVAL;
 
 	if (test_thread_flag(TIF_TAGGED_ADDR))
-		return PR_TAGGED_ADDR_ENABLE;
+		ret = PR_TAGGED_ADDR_ENABLE;
 
-	return 0;
+	ret |= get_mte_ctrl();
+
+	return ret;
 }
 
 /*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e362b..2390ab324afaaa 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -233,6 +233,12 @@ struct prctl_mm_map {
 #define PR_SET_TAGGED_ADDR_CTRL		55
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
+/* MTE tag check fault modes */
+# define PR_MTE_TCF_SHIFT		1
+# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57

From af5ce95282dc99d08a27a407a02c763dde1c5558 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 10 Dec 2019 11:19:15 +0000
Subject: [PATCH 066/262] arm64: mte: Allow user control of the generated
 random tags via prctl()

The IRG, ADDG and SUBG instructions insert a random tag in the resulting
address. Certain tags can be excluded via the GCR_EL1.Exclude bitmap
when, for example, the user wants a certain colour for freed buffers.
Since the GCR_EL1 register is not accessible at EL0, extend the
prctl(PR_SET_TAGGED_ADDR_CTRL) interface to include a 16-bit field in
the first argument for controlling which tags can be generated by the
above instruction (an include rather than exclude mask). Note that by
default all non-zero tags are excluded. This setting is per-thread.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/processor.h |  1 +
 arch/arm64/include/asm/sysreg.h    |  7 ++++++
 arch/arm64/kernel/mte.c            | 35 +++++++++++++++++++++++++++---
 arch/arm64/kernel/process.c        |  2 +-
 include/uapi/linux/prctl.h         |  3 +++
 5 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 80e7f057330993..e1b1c2a6086e43 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -153,6 +153,7 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_ARM64_MTE
 	u64			sctlr_tcf0;
+	u64			gcr_user_incl;
 #endif
 };
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index daf030a05de09e..52eefe2f7d9579 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -1078,6 +1078,13 @@
 		write_sysreg(__scs_new, sysreg);			\
 } while (0)
 
+#define sysreg_clear_set_s(sysreg, clear, set) do {			\
+	u64 __scs_val = read_sysreg_s(sysreg);				\
+	u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set);		\
+	if (__scs_new != __scs_val)					\
+		write_sysreg_s(__scs_new, sysreg);			\
+} while (0)
+
 #endif
 
 #endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 375483a1f57329..07798b8d503900 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -71,6 +71,25 @@ static void set_sctlr_el1_tcf0(u64 tcf0)
 	preempt_enable();
 }
 
+static void update_gcr_el1_excl(u64 incl)
+{
+	u64 excl = ~incl & SYS_GCR_EL1_EXCL_MASK;
+
+	/*
+	 * Note that 'incl' is an include mask (controlled by the user via
+	 * prctl()) while GCR_EL1 accepts an exclude mask.
+	 * No need for ISB since this only affects EL0 currently, implicit
+	 * with ERET.
+	 */
+	sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
+}
+
+static void set_gcr_el1_excl(u64 incl)
+{
+	current->thread.gcr_user_incl = incl;
+	update_gcr_el1_excl(incl);
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
@@ -82,6 +101,8 @@ void flush_mte_state(void)
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
 	/* disable tag checking */
 	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+	/* reset tag generation mask */
+	set_gcr_el1_excl(0);
 }
 
 void mte_thread_switch(struct task_struct *next)
@@ -92,6 +113,7 @@ void mte_thread_switch(struct task_struct *next)
 	/* avoid expensive SCTLR_EL1 accesses if no change */
 	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
 		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+	update_gcr_el1_excl(next->thread.gcr_user_incl);
 }
 
 long set_mte_ctrl(unsigned long arg)
@@ -116,23 +138,30 @@ long set_mte_ctrl(unsigned long arg)
 	}
 
 	set_sctlr_el1_tcf0(tcf0);
+	set_gcr_el1_excl((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT);
 
 	return 0;
 }
 
 long get_mte_ctrl(void)
 {
+	unsigned long ret;
+
 	if (!system_supports_mte())
 		return 0;
 
+	ret = current->thread.gcr_user_incl << PR_MTE_TAG_SHIFT;
+
 	switch (current->thread.sctlr_tcf0) {
 	case SCTLR_EL1_TCF0_NONE:
 		return PR_MTE_TCF_NONE;
 	case SCTLR_EL1_TCF0_SYNC:
-		return PR_MTE_TCF_SYNC;
+		ret |= PR_MTE_TCF_SYNC;
+		break;
 	case SCTLR_EL1_TCF0_ASYNC:
-		return PR_MTE_TCF_ASYNC;
+		ret |= PR_MTE_TCF_ASYNC;
+		break;
 	}
 
-	return 0;
+	return ret;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bb759b88d44a80..c80383f30d6a5b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -649,7 +649,7 @@ long set_tagged_addr_ctrl(unsigned long arg)
 		return -EINVAL;
 
 	if (system_supports_mte())
-		valid_mask |= PR_MTE_TCF_MASK;
+		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
 
 	if (arg & ~valid_mask)
 		return -EINVAL;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 2390ab324afaaa..7f0827705c9a46 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -239,6 +239,9 @@ struct prctl_mm_map {
 # define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
 # define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
 # define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
+/* MTE tag inclusion mask */
+# define PR_MTE_TAG_SHIFT		3
+# define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57

From 39d08e8318c49c5fd2bda9cadfd2bc54d2d3dfd8 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 17 Apr 2020 18:29:35 +0100
Subject: [PATCH 067/262] arm64: mte: Restore the GCR_EL1 register after a
 suspend

The CPU resume/suspend routines only take care of the common system
registers. Restore GCR_EL1 in addition via the __cpu_suspend_exit()
function.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 arch/arm64/include/asm/mte.h | 4 ++++
 arch/arm64/kernel/mte.c      | 8 ++++++++
 arch/arm64/kernel/suspend.c  | 4 ++++
 3 files changed, 16 insertions(+)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index df2efbc9f8f1a4..c93047eff9fea2 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -22,6 +22,7 @@ void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
 void mte_thread_switch(struct task_struct *next);
+void mte_suspend_exit(void);
 long set_mte_ctrl(unsigned long arg);
 long get_mte_ctrl(void);
 
@@ -42,6 +43,9 @@ static inline void flush_mte_state(void)
 static inline void mte_thread_switch(struct task_struct *next)
 {
 }
+static inline void mte_suspend_exit(void)
+{
+}
 static inline long set_mte_ctrl(unsigned long arg)
 {
 	return 0;
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 07798b8d503900..09cf76fc1090b5 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -116,6 +116,14 @@ void mte_thread_switch(struct task_struct *next)
 	update_gcr_el1_excl(next->thread.gcr_user_incl);
 }
 
+void mte_suspend_exit(void)
+{
+	if (!system_supports_mte())
+		return;
+
+	update_gcr_el1_excl(current->thread.gcr_user_incl);
+}
+
 long set_mte_ctrl(unsigned long arg)
 {
 	u64 tcf0;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index c1dee9066ff971..62c239cd60c27c 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -10,6 +10,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/exec.h>
+#include <asm/mte.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
 #include <asm/smp_plat.h>
@@ -74,6 +75,9 @@ void notrace __cpu_suspend_exit(void)
 	 */
 	if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
 		arm64_set_ssbd_mitigation(false);
+
+	/* Restore additional MTE-specific configuration */
+	mte_suspend_exit();
 }
 
 /*

From 93f067f6caf5941cc730e99ce72042304e0e6ff5 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 3 Jul 2020 14:25:50 +0100
Subject: [PATCH 068/262] arm64: mte: Allow {set,get}_tagged_addr_ctrl() on
 non-current tasks

In preparation for ptrace() access to the prctl() value, allow calling
these functions on non-current tasks.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h       |  8 ++++----
 arch/arm64/include/asm/processor.h |  8 ++++----
 arch/arm64/kernel/mte.c            | 18 ++++++++++++------
 arch/arm64/kernel/process.c        | 18 ++++++++++--------
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index c93047eff9fea2..1a919905295b62 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -23,8 +23,8 @@ void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
 void mte_thread_switch(struct task_struct *next);
 void mte_suspend_exit(void);
-long set_mte_ctrl(unsigned long arg);
-long get_mte_ctrl(void);
+long set_mte_ctrl(struct task_struct *task, unsigned long arg);
+long get_mte_ctrl(struct task_struct *task);
 
 #else
 
@@ -46,11 +46,11 @@ static inline void mte_thread_switch(struct task_struct *next)
 static inline void mte_suspend_exit(void)
 {
 }
-static inline long set_mte_ctrl(unsigned long arg)
+static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	return 0;
 }
-static inline long get_mte_ctrl(void)
+static inline long get_mte_ctrl(struct task_struct *task)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index e1b1c2a6086e43..fec204d28fceab 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -319,10 +319,10 @@ extern void __init minsigstksz_setup(void);
 
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
 /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
-long set_tagged_addr_ctrl(unsigned long arg);
-long get_tagged_addr_ctrl(void);
-#define SET_TAGGED_ADDR_CTRL(arg)	set_tagged_addr_ctrl(arg)
-#define GET_TAGGED_ADDR_CTRL()		get_tagged_addr_ctrl()
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg);
+long get_tagged_addr_ctrl(struct task_struct *task);
+#define SET_TAGGED_ADDR_CTRL(arg)	set_tagged_addr_ctrl(current, arg)
+#define GET_TAGGED_ADDR_CTRL()		get_tagged_addr_ctrl(current)
 #endif
 
 /*
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 09cf76fc1090b5..e80c49af74af98 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -124,9 +124,10 @@ void mte_suspend_exit(void)
 	update_gcr_el1_excl(current->thread.gcr_user_incl);
 }
 
-long set_mte_ctrl(unsigned long arg)
+long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
 	u64 tcf0;
+	u64 gcr_incl = (arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT;
 
 	if (!system_supports_mte())
 		return 0;
@@ -145,22 +146,27 @@ long set_mte_ctrl(unsigned long arg)
 		return -EINVAL;
 	}
 
-	set_sctlr_el1_tcf0(tcf0);
-	set_gcr_el1_excl((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT);
+	if (task != current) {
+		task->thread.sctlr_tcf0 = tcf0;
+		task->thread.gcr_user_incl = gcr_incl;
+	} else {
+		set_sctlr_el1_tcf0(tcf0);
+		set_gcr_el1_excl(gcr_incl);
+	}
 
 	return 0;
 }
 
-long get_mte_ctrl(void)
+long get_mte_ctrl(struct task_struct *task)
 {
 	unsigned long ret;
 
 	if (!system_supports_mte())
 		return 0;
 
-	ret = current->thread.gcr_user_incl << PR_MTE_TAG_SHIFT;
+	ret = task->thread.gcr_user_incl << PR_MTE_TAG_SHIFT;
 
-	switch (current->thread.sctlr_tcf0) {
+	switch (task->thread.sctlr_tcf0) {
 	case SCTLR_EL1_TCF0_NONE:
 		return PR_MTE_TCF_NONE;
 	case SCTLR_EL1_TCF0_SYNC:
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c80383f30d6a5b..05a9cdd0b47185 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -641,11 +641,12 @@ void arch_setup_new_exec(void)
  */
 static unsigned int tagged_addr_disabled;
 
-long set_tagged_addr_ctrl(unsigned long arg)
+long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
 {
 	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
+	struct thread_info *ti = task_thread_info(task);
 
-	if (is_compat_task())
+	if (is_compat_thread(ti))
 		return -EINVAL;
 
 	if (system_supports_mte())
@@ -661,25 +662,26 @@ long set_tagged_addr_ctrl(unsigned long arg)
 	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
 		return -EINVAL;
 
-	if (set_mte_ctrl(arg) != 0)
+	if (set_mte_ctrl(task, arg) != 0)
 		return -EINVAL;
 
-	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
+	update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
 
 	return 0;
 }
 
-long get_tagged_addr_ctrl(void)
+long get_tagged_addr_ctrl(struct task_struct *task)
 {
 	long ret = 0;
+	struct thread_info *ti = task_thread_info(task);
 
-	if (is_compat_task())
+	if (is_compat_thread(ti))
 		return -EINVAL;
 
-	if (test_thread_flag(TIF_TAGGED_ADDR))
+	if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
 		ret = PR_TAGGED_ADDR_ENABLE;
 
-	ret |= get_mte_ctrl();
+	ret |= get_mte_ctrl(task);
 
 	return ret;
 }

From 18ddbaa02b7a64f4cf3e7e3d4b78b8b70481a17b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 30 Mar 2020 10:29:38 +0100
Subject: [PATCH 069/262] arm64: mte: ptrace: Add PTRACE_{PEEK,POKE}MTETAGS
 support

Add support for bulk setting/getting of the MTE tags in a tracee's
address space at 'addr' in the ptrace() syscall prototype. 'data' points
to a struct iovec in the tracer's address space with iov_base
representing the address of a tracer's buffer of length iov_len. The
tags to be copied to/from the tracer's buffer are stored as one tag per
byte.

On successfully copying at least one tag, ptrace() returns 0 and updates
the tracer's iov_len with the number of tags copied. In case of error,
either -EIO or -EFAULT is returned, trying to follow the ptrace() man
page.

Note that the tag copying functions are not performance critical,
therefore they lack optimisations found in typical memory copy routines.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Alan Hayward <Alan.Hayward@arm.com>
Cc: Luis Machado <luis.machado@linaro.org>
Cc: Omair Javaid <omair.javaid@linaro.org>
---
 arch/arm64/include/asm/mte.h         |  17 ++++
 arch/arm64/include/uapi/asm/ptrace.h |   3 +
 arch/arm64/kernel/mte.c              | 138 +++++++++++++++++++++++++++
 arch/arm64/kernel/ptrace.c           |   7 ++
 arch/arm64/lib/mte.S                 |  53 ++++++++++
 5 files changed, 218 insertions(+)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 1a919905295b62..7ea0c0e526d12b 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -5,6 +5,11 @@
 #ifndef __ASM_MTE_H
 #define __ASM_MTE_H
 
+#define MTE_GRANULE_SIZE	UL(16)
+#define MTE_GRANULE_MASK	(~(MTE_GRANULE_SIZE - 1))
+#define MTE_TAG_SHIFT		56
+#define MTE_TAG_SIZE		4
+
 #ifndef __ASSEMBLY__
 
 #include <linux/page-flags.h>
@@ -12,6 +17,10 @@
 #include <asm/pgtable-types.h>
 
 void mte_clear_page_tags(void *addr);
+unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
+				      unsigned long n);
+unsigned long mte_copy_tags_to_user(void __user *to, void *from,
+				    unsigned long n);
 
 #ifdef CONFIG_ARM64_MTE
 
@@ -25,6 +34,8 @@ void mte_thread_switch(struct task_struct *next);
 void mte_suspend_exit(void);
 long set_mte_ctrl(struct task_struct *task, unsigned long arg);
 long get_mte_ctrl(struct task_struct *task);
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+			 unsigned long addr, unsigned long data);
 
 #else
 
@@ -54,6 +65,12 @@ static inline long get_mte_ctrl(struct task_struct *task)
 {
 	return 0;
 }
+static inline int mte_ptrace_copy_tags(struct task_struct *child,
+				       long request, unsigned long addr,
+				       unsigned long data)
+{
+	return -EIO;
+}
 
 #endif
 
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 06413d9f234181..758ae984ff9775 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -76,6 +76,9 @@
 /* syscall emulation path in ptrace */
 #define PTRACE_SYSEMU		  31
 #define PTRACE_SYSEMU_SINGLESTEP  32
+/* MTE allocation tag access */
+#define PTRACE_PEEKMTETAGS	  33
+#define PTRACE_POKEMTETAGS	  34
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index e80c49af74af98..56e79807006c60 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -4,14 +4,18 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/prctl.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
+#include <linux/uio.h>
 
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
+#include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
 void mte_sync_tags(pte_t *ptep, pte_t pte)
@@ -179,3 +183,137 @@ long get_mte_ctrl(struct task_struct *task)
 
 	return ret;
 }
+
+/*
+ * Access MTE tags in another process' address space as given in mm. Update
+ * the number of tags copied. Return 0 if any tags copied, error otherwise.
+ * Inspired by __access_remote_vm().
+ */
+static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
+				struct iovec *kiov, unsigned int gup_flags)
+{
+	struct vm_area_struct *vma;
+	void __user *buf = kiov->iov_base;
+	size_t len = kiov->iov_len;
+	int ret;
+	int write = gup_flags & FOLL_WRITE;
+
+	if (!access_ok(buf, len))
+		return -EFAULT;
+
+	if (mmap_read_lock_killable(mm))
+		return -EIO;
+
+	while (len) {
+		unsigned long tags, offset;
+		void *maddr;
+		struct page *page = NULL;
+
+		ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
+					    &vma, NULL);
+		if (ret <= 0)
+			break;
+
+		/*
+		 * Only copy tags if the page has been mapped as PROT_MTE
+		 * (PG_mte_tagged set). Otherwise the tags are not valid and
+		 * not accessible to user. Moreover, an mprotect(PROT_MTE)
+		 * would cause the existing tags to be cleared if the page
+		 * was never mapped with PROT_MTE.
+		 */
+		if (!test_bit(PG_mte_tagged, &page->flags)) {
+			ret = -EOPNOTSUPP;
+			put_page(page);
+			break;
+		}
+
+		/* limit access to the end of the page */
+		offset = offset_in_page(addr);
+		tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
+
+		maddr = page_address(page);
+		if (write) {
+			tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
+			set_page_dirty_lock(page);
+		} else {
+			tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
+		}
+		put_page(page);
+
+		/* error accessing the tracer's buffer */
+		if (!tags)
+			break;
+
+		len -= tags;
+		buf += tags;
+		addr += tags * MTE_GRANULE_SIZE;
+	}
+	mmap_read_unlock(mm);
+
+	/* return an error if no tags copied */
+	kiov->iov_len = buf - kiov->iov_base;
+	if (!kiov->iov_len) {
+		/* check for error accessing the tracee's address space */
+		if (ret <= 0)
+			return -EIO;
+		else
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy MTE tags in another process' address space at 'addr' to/from tracer's
+ * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
+ */
+static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
+			      struct iovec *kiov, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	if (!tsk->ptrace || (current != tsk->parent) ||
+	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
+	     !ptracer_capable(tsk, mm->user_ns))) {
+		mmput(mm);
+		return -EPERM;
+	}
+
+	ret = __access_remote_tags(mm, addr, kiov, gup_flags);
+	mmput(mm);
+
+	return ret;
+}
+
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+			 unsigned long addr, unsigned long data)
+{
+	int ret;
+	struct iovec kiov;
+	struct iovec __user *uiov = (void __user *)data;
+	unsigned int gup_flags = FOLL_FORCE;
+
+	if (!system_supports_mte())
+		return -EIO;
+
+	if (get_user(kiov.iov_base, &uiov->iov_base) ||
+	    get_user(kiov.iov_len, &uiov->iov_len))
+		return -EFAULT;
+
+	if (request == PTRACE_POKEMTETAGS)
+		gup_flags |= FOLL_WRITE;
+
+	/* align addr to the MTE tag granule */
+	addr &= MTE_GRANULE_MASK;
+
+	ret = access_remote_tags(child, addr, &kiov, gup_flags);
+	if (!ret)
+		ret = put_user(kiov.iov_len, &uiov->iov_len);
+
+	return ret;
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 8942de814b7233..101040a37d4066 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -34,6 +34,7 @@
 #include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
 #include <asm/fpsimd.h>
+#include <asm/mte.h>
 #include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
 #include <asm/syscall.h>
@@ -1691,6 +1692,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
 {
+	switch (request) {
+	case PTRACE_PEEKMTETAGS:
+	case PTRACE_POKEMTETAGS:
+		return mte_ptrace_copy_tags(child, request, addr, data);
+	}
+
 	return ptrace_request(child, request, addr, data);
 }
 
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 3c3d0edbbca373..434f81d9a180f0 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -4,7 +4,9 @@
  */
 #include <linux/linkage.h>
 
+#include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/mte.h>
 #include <asm/page.h>
 #include <asm/sysreg.h>
 
@@ -51,3 +53,54 @@ SYM_FUNC_START(mte_copy_page_tags)
 	b.ne	1b
 	ret
 SYM_FUNC_END(mte_copy_page_tags)
+
+/*
+ * Read tags from a user buffer (one tag per byte) and set the corresponding
+ * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
+ *   x0 - kernel address (to)
+ *   x1 - user buffer (from)
+ *   x2 - number of tags/bytes (n)
+ * Returns:
+ *   x0 - number of tags read/set
+ */
+SYM_FUNC_START(mte_copy_tags_from_user)
+	mov	x3, x1
+	cbz	x2, 2f
+1:
+	uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0
+	lsl	x4, x4, #MTE_TAG_SHIFT
+	stg	x4, [x0], #MTE_GRANULE_SIZE
+	add	x1, x1, #1
+	subs	x2, x2, #1
+	b.ne	1b
+
+	// exception handling and function return
+2:	sub	x0, x1, x3		// update the number of tags set
+	ret
+SYM_FUNC_END(mte_copy_tags_from_user)
+
+/*
+ * Get the tags from a kernel address range and write the tag values to the
+ * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS.
+ *   x0 - user buffer (to)
+ *   x1 - kernel address (from)
+ *   x2 - number of tags/bytes (n)
+ * Returns:
+ *   x0 - number of tags read/set
+ */
+SYM_FUNC_START(mte_copy_tags_to_user)
+	mov	x3, x0
+	cbz	x2, 2f
+1:
+	ldg	x4, [x1]
+	ubfx	x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
+	uao_user_alternative 2f, strb, sttrb, w4, x0, 0
+	add	x0, x0, #1
+	add	x1, x1, #MTE_GRANULE_SIZE
+	subs	x2, x2, #1
+	b.ne	1b
+
+	// exception handling and function return
+2:	sub	x0, x0, x3		// update the number of tags copied
+	ret
+SYM_FUNC_END(mte_copy_tags_to_user)

From 2200aa7154cb7ef76bac93e98326883ba64bfa2e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 3 Jul 2020 15:12:57 +0100
Subject: [PATCH 070/262] arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL
 regset

This regset allows read/write access to a ptraced process
prctl(PR_SET_TAGGED_ADDR_CTRL) setting.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Alan Hayward <Alan.Hayward@arm.com>
Cc: Luis Machado <luis.machado@linaro.org>
Cc: Omair Javaid <omair.javaid@linaro.org>
---
 arch/arm64/kernel/ptrace.c | 42 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/elf.h   |  1 +
 2 files changed, 43 insertions(+)

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 101040a37d4066..f49b349e16a345 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1033,6 +1033,35 @@ static int pac_generic_keys_set(struct task_struct *target,
 #endif /* CONFIG_CHECKPOINT_RESTORE */
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+static int tagged_addr_ctrl_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	long ctrl = get_tagged_addr_ctrl(target);
+
+	if (IS_ERR_VALUE(ctrl))
+		return ctrl;
+
+	return membuf_write(&to, &ctrl, sizeof(ctrl));
+}
+
+static int tagged_addr_ctrl_set(struct task_struct *target, const struct
+				user_regset *regset, unsigned int pos,
+				unsigned int count, const void *kbuf, const
+				void __user *ubuf)
+{
+	int ret;
+	long ctrl;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1);
+	if (ret)
+		return ret;
+
+	return set_tagged_addr_ctrl(target, ctrl);
+}
+#endif
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -1052,6 +1081,9 @@ enum aarch64_regset {
 	REGSET_PACG_KEYS,
 #endif
 #endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+	REGSET_TAGGED_ADDR_CTRL,
+#endif
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -1149,6 +1181,16 @@ static const struct user_regset aarch64_regsets[] = {
 	},
 #endif
 #endif
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+	[REGSET_TAGGED_ADDR_CTRL] = {
+		.core_note_type = NT_ARM_TAGGED_ADDR_CTRL,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = tagged_addr_ctrl_get,
+		.set = tagged_addr_ctrl_set,
+	},
+#endif
 };
 
 static const struct user_regset_view user_aarch64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 22220945a5fdb5..30f68b42eeb53f 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -425,6 +425,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_PAC_MASK		0x406	/* ARM pointer authentication code masks */
 #define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
 #define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */

From d563d678aa0be06e7bff2953c986f5ff0355f79c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 1 Jul 2020 17:46:06 +0100
Subject: [PATCH 071/262] fs: Handle intra-page faults in copy_mount_options()

The copy_mount_options() function takes a user pointer argument but no
size and it tries to read up to a PAGE_SIZE. However, copy_from_user()
is not guaranteed to return all the accessible bytes if, for example,
the access crosses a page boundary and gets a fault on the second page.
To work around this, the current copy_mount_options() implementation
performs two copy_from_user() passes, first to the end of the current
page and the second to what's left in the subsequent page.

On arm64 with MTE enabled, access to a user page may trigger a fault
after part of the buffer in a page has been copied (when the user
pointer tag, bits 56-59, no longer matches the allocation tag stored in
memory). Allow copy_mount_options() to handle such intra-page faults by
resorting to byte at a time copy in case of copy_from_user() failure.

Note that copy_from_user() handles the zeroing of the kernel buffer in
case of error.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index bae0e95b3713a3..32a0b9146757e5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3075,7 +3075,7 @@ static void shrink_submounts(struct mount *mnt)
 void *copy_mount_options(const void __user * data)
 {
 	char *copy;
-	unsigned size;
+	unsigned left, offset;
 
 	if (!data)
 		return NULL;
@@ -3084,16 +3084,27 @@ void *copy_mount_options(const void __user * data)
 	if (!copy)
 		return ERR_PTR(-ENOMEM);
 
-	size = PAGE_SIZE - offset_in_page(data);
+	left = copy_from_user(copy, data, PAGE_SIZE);
 
-	if (copy_from_user(copy, data, size)) {
+	/*
+	 * Not all architectures have an exact copy_from_user(). Resort to
+	 * byte at a time.
+	 */
+	offset = PAGE_SIZE - left;
+	while (left) {
+		char c;
+		if (get_user(c, (const char __user *)data + offset))
+			break;
+		copy[offset] = c;
+		left--;
+		offset++;
+	}
+
+	if (left == PAGE_SIZE) {
 		kfree(copy);
 		return ERR_PTR(-EFAULT);
 	}
-	if (size != PAGE_SIZE) {
-		if (copy_from_user(copy + size, data + size, PAGE_SIZE - size))
-			memset(copy + size, 0, PAGE_SIZE - size);
-	}
+
 	return copy;
 }
 

From 8a84802e2a2b1a682761a31c2685506b9f4e1840 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Wed, 13 May 2020 16:37:49 +0100
Subject: [PATCH 072/262] mm: Add arch hooks for saving/restoring tags

Arm's Memory Tagging Extension (MTE) adds some metadata (tags) to
every physical page, when swapping pages out to disk it is necessary to
save these tags, and later restore them when reading the pages back.

Add some hooks along with dummy implementations to enable the
arch code to handle this.

Three new hooks are added to the swap code:
 * arch_prepare_to_swap() and
 * arch_swap_invalidate_page() / arch_swap_invalidate_area().
One new hook is added to shmem:
 * arch_swap_restore()

Signed-off-by: Steven Price <steven.price@arm.com>
[catalin.marinas@arm.com: add unlock_page() on the error path]
[catalin.marinas@arm.com: dropped the _tags suffix]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 28 ++++++++++++++++++++++++++++
 mm/page_io.c            | 10 ++++++++++
 mm/shmem.c              |  6 ++++++
 mm/swapfile.c           |  2 ++
 4 files changed, 46 insertions(+)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e8cbc2e795d530..dc3b74129fbcce 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -633,6 +633,34 @@ static inline int arch_unmap_one(struct mm_struct *mm,
 }
 #endif
 
+/*
+ * Allow architectures to preserve additional metadata associated with
+ * swapped-out pages. The corresponding __HAVE_ARCH_SWAP_* macros and function
+ * prototypes must be defined in the arch-specific asm/pgtable.h file.
+ */
+#ifndef __HAVE_ARCH_PREPARE_TO_SWAP
+static inline int arch_prepare_to_swap(struct page *page)
+{
+	return 0;
+}
+#endif
+
+#ifndef __HAVE_ARCH_SWAP_INVALIDATE
+static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
+{
+}
+
+static inline void arch_swap_invalidate_area(int type)
+{
+}
+#endif
+
+#ifndef __HAVE_ARCH_SWAP_RESTORE
+static inline void arch_swap_restore(swp_entry_t entry, struct page *page)
+{
+}
+#endif
+
 #ifndef __HAVE_ARCH_PGD_OFFSET_GATE
 #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
 #endif
diff --git a/mm/page_io.c b/mm/page_io.c
index e485a6e8a6cddb..4ca28aad0d9461 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -252,6 +252,16 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
 		unlock_page(page);
 		goto out;
 	}
+	/*
+	 * Arch code may have to preserve more data than just the page
+	 * contents, e.g. memory tags.
+	 */
+	ret = arch_prepare_to_swap(page);
+	if (ret) {
+		set_page_dirty(page);
+		unlock_page(page);
+		goto out;
+	}
 	if (frontswap_store(page) == 0) {
 		set_page_writeback(page);
 		unlock_page(page);
diff --git a/mm/shmem.c b/mm/shmem.c
index ec94f4c7851e44..e57d3314dc4bcf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1734,6 +1734,12 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
 	}
 	wait_on_page_writeback(page);
 
+	/*
+	 * Some architectures may have to restore extra metadata to the
+	 * physical page after reading from swap.
+	 */
+	arch_swap_restore(swap, page);
+
 	if (shmem_should_replace_page(page, gfp)) {
 		error = shmem_replace_page(&page, gfp, info, index);
 		if (error)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 12f59e641b5e29..4b1d1a04e3271e 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -717,6 +717,7 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
 	else
 		swap_slot_free_notify = NULL;
 	while (offset <= end) {
+		arch_swap_invalidate_page(si->type, offset);
 		frontswap_invalidate_page(si->type, offset);
 		if (swap_slot_free_notify)
 			swap_slot_free_notify(si->bdev, offset);
@@ -2682,6 +2683,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	frontswap_map = frontswap_map_get(p);
 	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
+	arch_swap_invalidate_area(p->type);
 	frontswap_invalidate_area(p->type);
 	frontswap_map_set(p, NULL);
 	mutex_unlock(&swapon_mutex);

From 36943aba91860269abfba2e736e9534d48e90cae Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Wed, 13 May 2020 16:37:50 +0100
Subject: [PATCH 073/262] arm64: mte: Enable swap of tagged pages

When swapping pages out to disk it is necessary to save any tags that
have been set, and restore when swapping back in. Make use of the new
page flag (PG_ARCH_2, locally named PG_mte_tagged) to identify pages
with tags. When swapping out these pages the tags are stored in memory
and later restored when the pages are brought back in. Because shmem can
swap pages back in without restoring the userspace PTE it is also
necessary to add a hook for shmem.

Signed-off-by: Steven Price <steven.price@arm.com>
[catalin.marinas@arm.com: move function prototypes to mte.h]
[catalin.marinas@arm.com: drop '_tags' from arch_swap_restore_tags()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h     |  8 +++
 arch/arm64/include/asm/pgtable.h | 32 ++++++++++++
 arch/arm64/kernel/mte.c          | 19 +++++++-
 arch/arm64/lib/mte.S             | 45 +++++++++++++++++
 arch/arm64/mm/Makefile           |  1 +
 arch/arm64/mm/mteswap.c          | 83 ++++++++++++++++++++++++++++++++
 6 files changed, 187 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/mm/mteswap.c

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 7ea0c0e526d12b..1c99fcadb58ca6 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -21,6 +21,14 @@ unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
 				      unsigned long n);
 unsigned long mte_copy_tags_to_user(void __user *to, void *from,
 				    unsigned long n);
+int mte_save_tags(struct page *page);
+void mte_save_page_tags(const void *page_addr, void *tag_storage);
+bool mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_page_tags(void *page_addr, const void *tag_storage);
+void mte_invalidate_tags(int type, pgoff_t offset);
+void mte_invalidate_tags_area(int type);
+void *mte_allocate_tag_storage(void);
+void mte_free_tag_storage(char *storage);
 
 #ifdef CONFIG_ARM64_MTE
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 057c40b6f5e008..1c46fcd873f6d4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -867,6 +867,38 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 
 extern int kern_addr_valid(unsigned long addr);
 
+#ifdef CONFIG_ARM64_MTE
+
+#define __HAVE_ARCH_PREPARE_TO_SWAP
+static inline int arch_prepare_to_swap(struct page *page)
+{
+	if (system_supports_mte())
+		return mte_save_tags(page);
+	return 0;
+}
+
+#define __HAVE_ARCH_SWAP_INVALIDATE
+static inline void arch_swap_invalidate_page(int type, pgoff_t offset)
+{
+	if (system_supports_mte())
+		mte_invalidate_tags(type, offset);
+}
+
+static inline void arch_swap_invalidate_area(int type)
+{
+	if (system_supports_mte())
+		mte_invalidate_tags_area(type);
+}
+
+#define __HAVE_ARCH_SWAP_RESTORE
+static inline void arch_swap_restore(swp_entry_t entry, struct page *page)
+{
+	if (system_supports_mte() && mte_restore_tags(entry, page))
+		set_bit(PG_mte_tagged, &page->flags);
+}
+
+#endif /* CONFIG_ARM64_MTE */
+
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 56e79807006c60..52a0638ed967b1 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -10,6 +10,8 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/string.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
 #include <linux/thread_info.h>
 #include <linux/uio.h>
 
@@ -18,15 +20,30 @@
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
+static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
+{
+	pte_t old_pte = READ_ONCE(*ptep);
+
+	if (check_swap && is_swap_pte(old_pte)) {
+		swp_entry_t entry = pte_to_swp_entry(old_pte);
+
+		if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
+			return;
+	}
+
+	mte_clear_page_tags(page_address(page));
+}
+
 void mte_sync_tags(pte_t *ptep, pte_t pte)
 {
 	struct page *page = pte_page(pte);
 	long i, nr_pages = compound_nr(page);
+	bool check_swap = nr_pages == 1;
 
 	/* if PG_mte_tagged is set, tags have already been initialised */
 	for (i = 0; i < nr_pages; i++, page++) {
 		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
-			mte_clear_page_tags(page_address(page));
+			mte_sync_page_tags(page, ptep, check_swap);
 	}
 }
 
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 434f81d9a180f0..03ca6d8b867069 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -104,3 +104,48 @@ SYM_FUNC_START(mte_copy_tags_to_user)
 2:	sub	x0, x0, x3		// update the number of tags copied
 	ret
 SYM_FUNC_END(mte_copy_tags_to_user)
+
+/*
+ * Save the tags in a page
+ *   x0 - page address
+ *   x1 - tag storage
+ */
+SYM_FUNC_START(mte_save_page_tags)
+	multitag_transfer_size x7, x5
+1:
+	mov	x2, #0
+2:
+	ldgm	x5, [x0]
+	orr	x2, x2, x5
+	add	x0, x0, x7
+	tst	x0, #0xFF		// 16 tag values fit in a register,
+	b.ne	2b			// which is 16*16=256 bytes
+
+	str	x2, [x1], #8
+
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	ret
+SYM_FUNC_END(mte_save_page_tags)
+
+/*
+ * Restore the tags in a page
+ *   x0 - page address
+ *   x1 - tag storage
+ */
+SYM_FUNC_START(mte_restore_page_tags)
+	multitag_transfer_size x7, x5
+1:
+	ldr	x2, [x1], #8
+2:
+	stgm	x2, [x0]
+	add	x0, x0, x7
+	tst	x0, #0xFF
+	b.ne	2b
+
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	ret
+SYM_FUNC_END(mte_restore_page_tags)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index d91030f0ffeeef..5bcc9e0aa2598f 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_PTDUMP_CORE)	+= dump.o
 obj-$(CONFIG_PTDUMP_DEBUGFS)	+= ptdump_debugfs.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
+obj-$(CONFIG_ARM64_MTE)		+= mteswap.o
 KASAN_SANITIZE_physaddr.o	+= n
 
 obj-$(CONFIG_KASAN)		+= kasan_init.o
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
new file mode 100644
index 00000000000000..c52c1847079c1a
--- /dev/null
+++ b/arch/arm64/mm/mteswap.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/pagemap.h>
+#include <linux/xarray.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <asm/mte.h>
+
+static DEFINE_XARRAY(mte_pages);
+
+void *mte_allocate_tag_storage(void)
+{
+	/* tags granule is 16 bytes, 2 tags stored per byte */
+	return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL);
+}
+
+void mte_free_tag_storage(char *storage)
+{
+	kfree(storage);
+}
+
+int mte_save_tags(struct page *page)
+{
+	void *tag_storage, *ret;
+
+	if (!test_bit(PG_mte_tagged, &page->flags))
+		return 0;
+
+	tag_storage = mte_allocate_tag_storage();
+	if (!tag_storage)
+		return -ENOMEM;
+
+	mte_save_page_tags(page_address(page), tag_storage);
+
+	/* page_private contains the swap entry.val set in do_swap_page */
+	ret = xa_store(&mte_pages, page_private(page), tag_storage, GFP_KERNEL);
+	if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
+		mte_free_tag_storage(tag_storage);
+		return xa_err(ret);
+	} else if (ret) {
+		/* Entry is being replaced, free the old entry */
+		mte_free_tag_storage(ret);
+	}
+
+	return 0;
+}
+
+bool mte_restore_tags(swp_entry_t entry, struct page *page)
+{
+	void *tags = xa_load(&mte_pages, entry.val);
+
+	if (!tags)
+		return false;
+
+	mte_restore_page_tags(page_address(page), tags);
+
+	return true;
+}
+
+void mte_invalidate_tags(int type, pgoff_t offset)
+{
+	swp_entry_t entry = swp_entry(type, offset);
+	void *tags = xa_erase(&mte_pages, entry.val);
+
+	mte_free_tag_storage(tags);
+}
+
+void mte_invalidate_tags_area(int type)
+{
+	swp_entry_t entry = swp_entry(type, 0);
+	swp_entry_t last_entry = swp_entry(type + 1, 0);
+	void *tags;
+
+	XA_STATE(xa_state, &mte_pages, entry.val);
+
+	xa_lock(&mte_pages);
+	xas_for_each(&xa_state, tags, last_entry.val - 1) {
+		__xa_erase(&mte_pages, xa_state.xa_index);
+		mte_free_tag_storage(tags);
+	}
+	xa_unlock(&mte_pages);
+}

From ee11f332af96a3b5d29586bc6d69b41531f62648 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Wed, 13 May 2020 16:37:51 +0100
Subject: [PATCH 074/262] arm64: mte: Save tags when hibernating

When hibernating the contents of all pages in the system are written to
disk, however the MTE tags are not visible to the generic hibernation
code. So just before the hibernation image is created copy the tags out
of the physical tag storage into standard memory so they will be
included in the hibernation image. After hibernation apply the tags back
into the physical tag storage.

Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/hibernate.c | 118 ++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 68e14152d6e9b0..23467092e24d28 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -31,6 +31,7 @@
 #include <asm/kexec.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
+#include <asm/mte.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/sections.h>
@@ -285,6 +286,117 @@ static int create_safe_exec_page(void *src_start, size_t length,
 
 #define dcache_clean_range(start, end)	__flush_dcache_area(start, (end - start))
 
+#ifdef CONFIG_ARM64_MTE
+
+static DEFINE_XARRAY(mte_pages);
+
+static int save_tags(struct page *page, unsigned long pfn)
+{
+	void *tag_storage, *ret;
+
+	tag_storage = mte_allocate_tag_storage();
+	if (!tag_storage)
+		return -ENOMEM;
+
+	mte_save_page_tags(page_address(page), tag_storage);
+
+	ret = xa_store(&mte_pages, pfn, tag_storage, GFP_KERNEL);
+	if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
+		mte_free_tag_storage(tag_storage);
+		return xa_err(ret);
+	} else if (WARN(ret, "swsusp: %s: Duplicate entry", __func__)) {
+		mte_free_tag_storage(ret);
+	}
+
+	return 0;
+}
+
+static void swsusp_mte_free_storage(void)
+{
+	XA_STATE(xa_state, &mte_pages, 0);
+	void *tags;
+
+	xa_lock(&mte_pages);
+	xas_for_each(&xa_state, tags, ULONG_MAX) {
+		mte_free_tag_storage(tags);
+	}
+	xa_unlock(&mte_pages);
+
+	xa_destroy(&mte_pages);
+}
+
+static int swsusp_mte_save_tags(void)
+{
+	struct zone *zone;
+	unsigned long pfn, max_zone_pfn;
+	int ret = 0;
+	int n = 0;
+
+	if (!system_supports_mte())
+		return 0;
+
+	for_each_populated_zone(zone) {
+		max_zone_pfn = zone_end_pfn(zone);
+		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
+			struct page *page = pfn_to_online_page(pfn);
+
+			if (!page)
+				continue;
+
+			if (!test_bit(PG_mte_tagged, &page->flags))
+				continue;
+
+			ret = save_tags(page, pfn);
+			if (ret) {
+				swsusp_mte_free_storage();
+				goto out;
+			}
+
+			n++;
+		}
+	}
+	pr_info("Saved %d MTE pages\n", n);
+
+out:
+	return ret;
+}
+
+static void swsusp_mte_restore_tags(void)
+{
+	XA_STATE(xa_state, &mte_pages, 0);
+	int n = 0;
+	void *tags;
+
+	xa_lock(&mte_pages);
+	xas_for_each(&xa_state, tags, ULONG_MAX) {
+		unsigned long pfn = xa_state.xa_index;
+		struct page *page = pfn_to_online_page(pfn);
+
+		mte_restore_page_tags(page_address(page), tags);
+
+		mte_free_tag_storage(tags);
+		n++;
+	}
+	xa_unlock(&mte_pages);
+
+	pr_info("Restored %d MTE pages\n", n);
+
+	xa_destroy(&mte_pages);
+}
+
+#else	/* CONFIG_ARM64_MTE */
+
+static int swsusp_mte_save_tags(void)
+{
+	return 0;
+}
+
+static void swsusp_mte_restore_tags(void)
+{
+}
+
+#endif	/* CONFIG_ARM64_MTE */
+
 int swsusp_arch_suspend(void)
 {
 	int ret = 0;
@@ -302,6 +414,10 @@ int swsusp_arch_suspend(void)
 		/* make the crash dump kernel image visible/saveable */
 		crash_prepare_suspend();
 
+		ret = swsusp_mte_save_tags();
+		if (ret)
+			return ret;
+
 		sleep_cpu = smp_processor_id();
 		ret = swsusp_save();
 	} else {
@@ -315,6 +431,8 @@ int swsusp_arch_suspend(void)
 			dcache_clean_range(__hyp_text_start, __hyp_text_end);
 		}
 
+		swsusp_mte_restore_tags();
+
 		/* make the crash dump kernel image protected again */
 		crash_post_resume();
 

From 89b94df9dfb16fe29f9d2085b0a98116dc34d814 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Fri, 6 Sep 2019 11:08:29 +0100
Subject: [PATCH 075/262] arm64: mte: Kconfig entry

Add Memory Tagging Extension support to the arm64 kbuild.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d232837cbeee8..e7450fbd0aa7ae 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1664,6 +1664,39 @@ config ARCH_RANDOM
 	  provides a high bandwidth, cryptographically secure
 	  hardware random number generator.
 
+config ARM64_AS_HAS_MTE
+	# Initial support for MTE went in binutils 2.32.0, checked with
+	# ".arch armv8.5-a+memtag" below. However, this was incomplete
+	# as a late addition to the final architecture spec (LDGM/STGM)
+	# is only supported in the newer 2.32.x and 2.33 binutils
+	# versions, hence the extra "stgm" instruction check below.
+	def_bool $(as-instr,.arch armv8.5-a+memtag\nstgm xzr$(comma)[x0])
+
+config ARM64_MTE
+	bool "Memory Tagging Extension support"
+	default y
+	depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI
+	select ARCH_USES_HIGH_VMA_FLAGS
+	help
+	  Memory Tagging (part of the ARMv8.5 Extensions) provides
+	  architectural support for run-time, always-on detection of
+	  various classes of memory error to aid with software debugging
+	  to eliminate vulnerabilities arising from memory-unsafe
+	  languages.
+
+	  This option enables the support for the Memory Tagging
+	  Extension at EL0 (i.e. for userspace).
+
+	  Selecting this option allows the feature to be detected at
+	  runtime. Any secondary CPU not implementing this feature will
+	  not be allowed a late bring-up.
+
+	  Userspace binaries that want to use this feature must
+	  explicitly opt in. The mechanism for the userspace is
+	  described in:
+
+	  Documentation/arm64/memory-tagging-extension.rst.
+
 endmenu
 
 config ARM64_SVE

From df9d7a22dd21c926e8175ccc6e176cb45fc7cb09 Mon Sep 17 00:00:00 2001
From: Vincenzo Frascino <vincenzo.frascino@arm.com>
Date: Fri, 6 Sep 2019 10:52:39 +0100
Subject: [PATCH 076/262] arm64: mte: Add Memory Tagging Extension
 documentation

Memory Tagging Extension (part of the ARMv8.5 Extensions) provides
a mechanism to detect the sources of memory related errors which
may be vulnerable to exploitation, including bounds violations,
use-after-free, use-after-return, use-out-of-scope and use before
initialization errors.

Add Memory Tagging Extension documentation for the arm64 linux
kernel support.

Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
Cc: Will Deacon <will@kernel.org>
---
 Documentation/arm64/cpu-feature-registers.rst |   2 +
 Documentation/arm64/elf_hwcaps.rst            |   4 +
 Documentation/arm64/index.rst                 |   1 +
 .../arm64/memory-tagging-extension.rst        | 305 ++++++++++++++++++
 4 files changed, 312 insertions(+)
 create mode 100644 Documentation/arm64/memory-tagging-extension.rst

diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
index f28853f80089bf..328e0c454fbd46 100644
--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -175,6 +175,8 @@ infrastructure:
      +------------------------------+---------+---------+
      | Name                         |  bits   | visible |
      +------------------------------+---------+---------+
+     | MTE                          | [11-8]  |    y    |
+     +------------------------------+---------+---------+
      | SSBS                         | [7-4]   |    y    |
      +------------------------------+---------+---------+
      | BT                           | [3-0]   |    y    |
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 84a9fd2d41b470..bbd9cf54db6c70 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -240,6 +240,10 @@ HWCAP2_BTI
 
     Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001.
 
+HWCAP2_MTE
+
+    Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
+    by Documentation/arm64/memory-tagging-extension.rst.
 
 4. Unused AT_HWCAP bits
 -----------------------
diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst
index d9665d83c53ace..43b0939d384e9c 100644
--- a/Documentation/arm64/index.rst
+++ b/Documentation/arm64/index.rst
@@ -14,6 +14,7 @@ ARM64 Architecture
     hugetlbpage
     legacy_instructions
     memory
+    memory-tagging-extension
     perf
     pointer-authentication
     silicon-errata
diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
new file mode 100644
index 00000000000000..e3709b536b89b4
--- /dev/null
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -0,0 +1,305 @@
+===============================================
+Memory Tagging Extension (MTE) in AArch64 Linux
+===============================================
+
+Authors: Vincenzo Frascino <vincenzo.frascino@arm.com>
+         Catalin Marinas <catalin.marinas@arm.com>
+
+Date: 2020-02-25
+
+This document describes the provision of the Memory Tagging Extension
+functionality in AArch64 Linux.
+
+Introduction
+============
+
+ARMv8.5 based processors introduce the Memory Tagging Extension (MTE)
+feature. MTE is built on top of the ARMv8.0 virtual address tagging TBI
+(Top Byte Ignore) feature and allows software to access a 4-bit
+allocation tag for each 16-byte granule in the physical address space.
+Such memory range must be mapped with the Normal-Tagged memory
+attribute. A logical tag is derived from bits 59-56 of the virtual
+address used for the memory access. A CPU with MTE enabled will compare
+the logical tag against the allocation tag and potentially raise an
+exception on mismatch, subject to system registers configuration.
+
+Userspace Support
+=================
+
+When ``CONFIG_ARM64_MTE`` is selected and Memory Tagging Extension is
+supported by the hardware, the kernel advertises the feature to
+userspace via ``HWCAP2_MTE``.
+
+PROT_MTE
+--------
+
+To access the allocation tags, a user process must enable the Tagged
+memory attribute on an address range using a new ``prot`` flag for
+``mmap()`` and ``mprotect()``:
+
+``PROT_MTE`` - Pages allow access to the MTE allocation tags.
+
+The allocation tag is set to 0 when such pages are first mapped in the
+user address space and preserved on copy-on-write. ``MAP_SHARED`` is
+supported and the allocation tags can be shared between processes.
+
+**Note**: ``PROT_MTE`` is only supported on ``MAP_ANONYMOUS`` and
+RAM-based file mappings (``tmpfs``, ``memfd``). Passing it to other
+types of mapping will result in ``-EINVAL`` returned by these system
+calls.
+
+**Note**: The ``PROT_MTE`` flag (and corresponding memory type) cannot
+be cleared by ``mprotect()``.
+
+**Note**: ``madvise()`` memory ranges with ``MADV_DONTNEED`` and
+``MADV_FREE`` may have the allocation tags cleared (set to 0) at any
+point after the system call.
+
+Tag Check Faults
+----------------
+
+When ``PROT_MTE`` is enabled on an address range and a mismatch between
+the logical and allocation tags occurs on access, there are three
+configurable behaviours:
+
+- *Ignore* - This is the default mode. The CPU (and kernel) ignores the
+  tag check fault.
+
+- *Synchronous* - The kernel raises a ``SIGSEGV`` synchronously, with
+  ``.si_code = SEGV_MTESERR`` and ``.si_addr = <fault-address>``. The
+  memory access is not performed. If ``SIGSEGV`` is ignored or blocked
+  by the offending thread, the containing process is terminated with a
+  ``coredump``.
+
+- *Asynchronous* - The kernel raises a ``SIGSEGV``, in the offending
+  thread, asynchronously following one or multiple tag check faults,
+  with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
+  address is unknown).
+
+The user can select the above modes, per thread, using the
+``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where
+``flags`` contain one of the following values in the ``PR_MTE_TCF_MASK``
+bit-field:
+
+- ``PR_MTE_TCF_NONE``  - *Ignore* tag check faults
+- ``PR_MTE_TCF_SYNC``  - *Synchronous* tag check fault mode
+- ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode
+
+The current tag check fault mode can be read using the
+``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
+
+Tag checking can also be disabled for a user thread by setting the
+``PSTATE.TCO`` bit with ``MSR TCO, #1``.
+
+**Note**: Signal handlers are always invoked with ``PSTATE.TCO = 0``,
+irrespective of the interrupted context. ``PSTATE.TCO`` is restored on
+``sigreturn()``.
+
+**Note**: There are no *match-all* logical tags available for user
+applications.
+
+**Note**: Kernel accesses to the user address space (e.g. ``read()``
+system call) are not checked if the user thread tag checking mode is
+``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is
+``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user
+address accesses, however it cannot always guarantee it.
+
+Excluding Tags in the ``IRG``, ``ADDG`` and ``SUBG`` instructions
+-----------------------------------------------------------------
+
+The architecture allows excluding certain tags to be randomly generated
+via the ``GCR_EL1.Exclude`` register bit-field. By default, Linux
+excludes all tags other than 0. A user thread can enable specific tags
+in the randomly generated set using the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
+flags, 0, 0, 0)`` system call where ``flags`` contains the tags bitmap
+in the ``PR_MTE_TAG_MASK`` bit-field.
+
+**Note**: The hardware uses an exclude mask but the ``prctl()``
+interface provides an include mask. An include mask of ``0`` (exclusion
+mask ``0xffff``) results in the CPU always generating tag ``0``.
+
+Initial process state
+---------------------
+
+On ``execve()``, the new process has the following configuration:
+
+- ``PR_TAGGED_ADDR_ENABLE`` set to 0 (disabled)
+- Tag checking mode set to ``PR_MTE_TCF_NONE``
+- ``PR_MTE_TAG_MASK`` set to 0 (all tags excluded)
+- ``PSTATE.TCO`` set to 0
+- ``PROT_MTE`` not set on any of the initial memory maps
+
+On ``fork()``, the new process inherits the parent's configuration and
+memory map attributes with the exception of the ``madvise()`` ranges
+with ``MADV_WIPEONFORK`` which will have the data and tags cleared (set
+to 0).
+
+The ``ptrace()`` interface
+--------------------------
+
+``PTRACE_PEEKMTETAGS`` and ``PTRACE_POKEMTETAGS`` allow a tracer to read
+the tags from or set the tags to a tracee's address space. The
+``ptrace()`` system call is invoked as ``ptrace(request, pid, addr,
+data)`` where:
+
+- ``request`` - one of ``PTRACE_PEEKMTETAGS`` or ``PTRACE_PEEKMTETAGS``.
+- ``pid`` - the tracee's PID.
+- ``addr`` - address in the tracee's address space.
+- ``data`` - pointer to a ``struct iovec`` where ``iov_base`` points to
+  a buffer of ``iov_len`` length in the tracer's address space.
+
+The tags in the tracer's ``iov_base`` buffer are represented as one
+4-bit tag per byte and correspond to a 16-byte MTE tag granule in the
+tracee's address space.
+
+**Note**: If ``addr`` is not aligned to a 16-byte granule, the kernel
+will use the corresponding aligned address.
+
+``ptrace()`` return value:
+
+- 0 - tags were copied, the tracer's ``iov_len`` was updated to the
+  number of tags transferred. This may be smaller than the requested
+  ``iov_len`` if the requested address range in the tracee's or the
+  tracer's space cannot be accessed or does not have valid tags.
+- ``-EPERM`` - the specified process cannot be traced.
+- ``-EIO`` - the tracee's address range cannot be accessed (e.g. invalid
+  address) and no tags copied. ``iov_len`` not updated.
+- ``-EFAULT`` - fault on accessing the tracer's memory (``struct iovec``
+  or ``iov_base`` buffer) and no tags copied. ``iov_len`` not updated.
+- ``-EOPNOTSUPP`` - the tracee's address does not have valid tags (never
+  mapped with the ``PROT_MTE`` flag). ``iov_len`` not updated.
+
+**Note**: There are no transient errors for the requests above, so user
+programs should not retry in case of a non-zero system call return.
+
+``PTRACE_GETREGSET`` and ``PTRACE_SETREGSET`` with ``addr ==
+``NT_ARM_TAGGED_ADDR_CTRL`` allow ``ptrace()`` access to the tagged
+address ABI control and MTE configuration of a process as per the
+``prctl()`` options described in
+Documentation/arm64/tagged-address-abi.rst and above. The corresponding
+``regset`` is 1 element of 8 bytes (``sizeof(long))``).
+
+Example of correct usage
+========================
+
+*MTE Example code*
+
+.. code-block:: c
+
+    /*
+     * To be compiled with -march=armv8.5-a+memtag
+     */
+    #include <errno.h>
+    #include <stdint.h>
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <unistd.h>
+    #include <sys/auxv.h>
+    #include <sys/mman.h>
+    #include <sys/prctl.h>
+
+    /*
+     * From arch/arm64/include/uapi/asm/hwcap.h
+     */
+    #define HWCAP2_MTE              (1 << 18)
+
+    /*
+     * From arch/arm64/include/uapi/asm/mman.h
+     */
+    #define PROT_MTE                 0x20
+
+    /*
+     * From include/uapi/linux/prctl.h
+     */
+    #define PR_SET_TAGGED_ADDR_CTRL 55
+    #define PR_GET_TAGGED_ADDR_CTRL 56
+    # define PR_TAGGED_ADDR_ENABLE  (1UL << 0)
+    # define PR_MTE_TCF_SHIFT       1
+    # define PR_MTE_TCF_NONE        (0UL << PR_MTE_TCF_SHIFT)
+    # define PR_MTE_TCF_SYNC        (1UL << PR_MTE_TCF_SHIFT)
+    # define PR_MTE_TCF_ASYNC       (2UL << PR_MTE_TCF_SHIFT)
+    # define PR_MTE_TCF_MASK        (3UL << PR_MTE_TCF_SHIFT)
+    # define PR_MTE_TAG_SHIFT       3
+    # define PR_MTE_TAG_MASK        (0xffffUL << PR_MTE_TAG_SHIFT)
+
+    /*
+     * Insert a random logical tag into the given pointer.
+     */
+    #define insert_random_tag(ptr) ({                       \
+            uint64_t __val;                                 \
+            asm("irg %0, %1" : "=r" (__val) : "r" (ptr));   \
+            __val;                                          \
+    })
+
+    /*
+     * Set the allocation tag on the destination address.
+     */
+    #define set_tag(tagged_addr) do {                                      \
+            asm volatile("stg %0, [%0]" : : "r" (tagged_addr) : "memory"); \
+    } while (0)
+
+    int main()
+    {
+            unsigned char *a;
+            unsigned long page_sz = sysconf(_SC_PAGESIZE);
+            unsigned long hwcap2 = getauxval(AT_HWCAP2);
+
+            /* check if MTE is present */
+            if (!(hwcap2 & HWCAP2_MTE))
+                    return EXIT_FAILURE;
+
+            /*
+             * Enable the tagged address ABI, synchronous MTE tag check faults and
+             * allow all non-zero tags in the randomly generated set.
+             */
+            if (prctl(PR_SET_TAGGED_ADDR_CTRL,
+                      PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | (0xfffe << PR_MTE_TAG_SHIFT),
+                      0, 0, 0)) {
+                    perror("prctl() failed");
+                    return EXIT_FAILURE;
+            }
+
+            a = mmap(0, page_sz, PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+            if (a == MAP_FAILED) {
+                    perror("mmap() failed");
+                    return EXIT_FAILURE;
+            }
+
+            /*
+             * Enable MTE on the above anonymous mmap. The flag could be passed
+             * directly to mmap() and skip this step.
+             */
+            if (mprotect(a, page_sz, PROT_READ | PROT_WRITE | PROT_MTE)) {
+                    perror("mprotect() failed");
+                    return EXIT_FAILURE;
+            }
+
+            /* access with the default tag (0) */
+            a[0] = 1;
+            a[1] = 2;
+
+            printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]);
+
+            /* set the logical and allocation tags */
+            a = (unsigned char *)insert_random_tag(a);
+            set_tag(a);
+
+            printf("%p\n", a);
+
+            /* non-zero tag access */
+            a[0] = 3;
+            printf("a[0] = %hhu a[1] = %hhu\n", a[0], a[1]);
+
+            /*
+             * If MTE is enabled correctly the next instruction will generate an
+             * exception.
+             */
+            printf("Expecting SIGSEGV...\n");
+            a[16] = 0xdd;
+
+            /* this should not be printed in the PR_MTE_TCF_SYNC mode */
+            printf("...haven't got one\n");
+
+            return EXIT_FAILURE;
+    }

From a0bc32b3cacf194dc479b342f006203fd1e1941a Mon Sep 17 00:00:00 2001
From: Akshay Gupta <Akshay.Gupta@amd.com>
Date: Fri, 28 Aug 2020 19:24:12 +0000
Subject: [PATCH 077/262] x86/mce: Increase maximum number of banks to 64

...because future AMD systems will support up to 64 MCA banks per CPU.

MAX_NR_BANKS is used to allocate a number of data structures, and it is
used as a ceiling for values read from MCG_CAP[Count]. Therefore, this
change will have no functional effect on existing systems with 32 or
fewer MCA banks per CPU.

However, this will increase the size of the following structures:

Global bitmaps:
- core.c / mce_banks_ce_disabled
- core.c / all_banks
- core.c / valid_banks
- core.c / toclear
- Total: 32 new bits * 4 bitmaps = 16 new bytes

Per-CPU bitmaps:
- core.c / mce_poll_banks
- intel.c / mce_banks_owned
- Total: 32 new bits * 2 bitmaps = 8 new bytes

The bitmaps are arrays of longs. So this change will only affect 32-bit
execution, since there will be one additional long used. There will be
no additional memory use on 64-bit execution, because the size of long
is 64 bits.

Global structs:
- amd.c / struct smca_bank smca_banks[]: 16 bytes per bank
- core.c / struct mce_bank_dev mce_bank_devs[]: 56 bytes per bank
- Total: 32 new banks * (16 + 56) bytes = 2304 new bytes

Per-CPU structs:
- core.c / struct mce_bank mce_banks_array[]: 16 bytes per bank
- Total: 32 new banks * 16 bytes = 512 new bytes

32-bit
Total global size increase: 2320 bytes
Total per-CPU size increase: 520 bytes

64-bit
Total global size increase: 2304 bytes
Total per-CPU size increase: 512 bytes

This additional memory should still fit within the existing .data
section of the kernel binary. However, in the case where it doesn't
fit, an additional page (4kB) of memory will be added to the binary to
accommodate the extra data which will be the maximum size increase of
vmlinux.

Signed-off-by: Akshay Gupta <Akshay.Gupta@amd.com>
[ Adjust commit message and code comment. ]
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200828192412.320052-1-Yazen.Ghannam@amd.com
---
 arch/x86/include/asm/mce.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6adced6e7dd357..109af5c7f51546 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -200,12 +200,8 @@ void mce_setup(struct mce *m);
 void mce_log(struct mce *m);
 DECLARE_PER_CPU(struct device *, mce_device);
 
-/*
- * Maximum banks number.
- * This is the limit of the current register layout on
- * Intel CPUs.
- */
-#define MAX_NR_BANKS 32
+/* Maximum number of MCA banks per CPU. */
+#define MAX_NR_BANKS 64
 
 #ifdef CONFIG_X86_MCE_INTEL
 void mce_intel_feature_init(struct cpuinfo_x86 *c);

From 93921baa3f6ff77e57d7e772165aa7bd709b5387 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 10 Aug 2020 08:55:08 +0100
Subject: [PATCH 078/262] x86/resctrl: Fix spelling in user-visible warning
 messages

Fix spelling mistake "Could't" -> "Couldn't" in user-visible warning
messages.

 [ bp: Massage commit message; s/cpu/CPU/g ]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200810075508.46490-1-colin.king@canonical.com
---
 arch/x86/kernel/cpu/resctrl/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 6a9df71c1b9eae..9cceee61256976 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -562,7 +562,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
 
 	d = rdt_find_domain(r, id, &add_pos);
 	if (IS_ERR(d)) {
-		pr_warn("Could't find cache id for cpu %d\n", cpu);
+		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
 		return;
 	}
 
@@ -607,7 +607,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
 
 	d = rdt_find_domain(r, id, NULL);
 	if (IS_ERR_OR_NULL(d)) {
-		pr_warn("Could't find cache id for cpu %d\n", cpu);
+		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
 		return;
 	}
 

From 62148d98590191f0515e89e7a6eed47bc5c51ae8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Aug 2020 14:29:23 +0200
Subject: [PATCH 079/262] m68k: Revive _TIF_* masks

While the core m68k code does not use the _TIF_* masks anymore, there
exists generic code that relies on their presence.  Fortunately none of
that code is used on m68k, currently.

Re-add the various _TIF_* masks, which were removed in commit
cddafa3500fde4a0 ("m68k/m68knommu: merge MMU and non-MMU
thread_info.h"), to avoid future nasty surprises.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Link: https://lore.kernel.org/r/20200826122923.22821-1-geert@linux-m68k.org
---
 arch/m68k/include/asm/thread_info.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 015f1ca383053a..3689c6718c883d 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -68,4 +68,12 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		16	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal */
 
+#define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+#define _TIF_DELAYED_TRACE	(1 << TIF_DELAYED_TRACE)
+#define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+#define _TIF_MEMDIE		(1 << TIF_MEMDIE)
+#define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
+
 #endif	/* _ASM_M68K_THREAD_INFO_H */

From d473de0f883db0801c78a86b34dd1fddf8ba2ca3 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Aug 2020 14:51:24 +0200
Subject: [PATCH 080/262] m68k: amiga: Clean up Amiga hardware configuration

Move the generic Amiga hardware configuration section out of the
switch statement, which allows to replace all ugly jumps by break
statements.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20200826125124.23863-1-geert@linux-m68k.org
---
 arch/m68k/amiga/config.c | 118 +++++++++++++++++++--------------------
 1 file changed, 58 insertions(+), 60 deletions(-)

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 9c1afa143d0596..bee9f240f35dee 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -214,7 +214,7 @@ static void __init amiga_identify(void)
 
 	switch (amiga_model) {
 	case AMI_UNKNOWN:
-		goto Generic;
+		break;
 
 	case AMI_600:
 	case AMI_1200:
@@ -227,7 +227,7 @@ static void __init amiga_identify(void)
 	case AMI_2000:
 	case AMI_2500:
 		AMIGAHW_SET(A2000_CLK);	/* Is this correct for all models? */
-		goto Generic;
+		break;
 
 	case AMI_3000:
 	case AMI_3000T:
@@ -238,7 +238,7 @@ static void __init amiga_identify(void)
 		AMIGAHW_SET(A3000_SCSI);
 		AMIGAHW_SET(A3000_CLK);
 		AMIGAHW_SET(ZORRO3);
-		goto Generic;
+		break;
 
 	case AMI_4000T:
 		AMIGAHW_SET(A4000_SCSI);
@@ -247,68 +247,12 @@ static void __init amiga_identify(void)
 		AMIGAHW_SET(A4000_IDE);
 		AMIGAHW_SET(A3000_CLK);
 		AMIGAHW_SET(ZORRO3);
-		goto Generic;
+		break;
 
 	case AMI_CDTV:
 	case AMI_CD32:
 		AMIGAHW_SET(CD_ROM);
 		AMIGAHW_SET(A2000_CLK);             /* Is this correct? */
-		goto Generic;
-
-	Generic:
-		AMIGAHW_SET(AMI_VIDEO);
-		AMIGAHW_SET(AMI_BLITTER);
-		AMIGAHW_SET(AMI_AUDIO);
-		AMIGAHW_SET(AMI_FLOPPY);
-		AMIGAHW_SET(AMI_KEYBOARD);
-		AMIGAHW_SET(AMI_MOUSE);
-		AMIGAHW_SET(AMI_SERIAL);
-		AMIGAHW_SET(AMI_PARALLEL);
-		AMIGAHW_SET(CHIP_RAM);
-		AMIGAHW_SET(PAULA);
-
-		switch (amiga_chipset) {
-		case CS_OCS:
-		case CS_ECS:
-		case CS_AGA:
-			switch (amiga_custom.deniseid & 0xf) {
-			case 0x0c:
-				AMIGAHW_SET(DENISE_HR);
-				break;
-			case 0x08:
-				AMIGAHW_SET(LISA);
-				break;
-			default:
-				AMIGAHW_SET(DENISE);
-				break;
-			}
-			break;
-		}
-		switch ((amiga_custom.vposr>>8) & 0x7f) {
-		case 0x00:
-			AMIGAHW_SET(AGNUS_PAL);
-			break;
-		case 0x10:
-			AMIGAHW_SET(AGNUS_NTSC);
-			break;
-		case 0x20:
-		case 0x21:
-			AMIGAHW_SET(AGNUS_HR_PAL);
-			break;
-		case 0x30:
-		case 0x31:
-			AMIGAHW_SET(AGNUS_HR_NTSC);
-			break;
-		case 0x22:
-		case 0x23:
-			AMIGAHW_SET(ALICE_PAL);
-			break;
-		case 0x32:
-		case 0x33:
-			AMIGAHW_SET(ALICE_NTSC);
-			break;
-		}
-		AMIGAHW_SET(ZORRO);
 		break;
 
 	case AMI_DRACO:
@@ -318,6 +262,60 @@ static void __init amiga_identify(void)
 		panic("Unknown Amiga Model");
 	}
 
+	AMIGAHW_SET(AMI_VIDEO);
+	AMIGAHW_SET(AMI_BLITTER);
+	AMIGAHW_SET(AMI_AUDIO);
+	AMIGAHW_SET(AMI_FLOPPY);
+	AMIGAHW_SET(AMI_KEYBOARD);
+	AMIGAHW_SET(AMI_MOUSE);
+	AMIGAHW_SET(AMI_SERIAL);
+	AMIGAHW_SET(AMI_PARALLEL);
+	AMIGAHW_SET(CHIP_RAM);
+	AMIGAHW_SET(PAULA);
+
+	switch (amiga_chipset) {
+	case CS_OCS:
+	case CS_ECS:
+	case CS_AGA:
+		switch (amiga_custom.deniseid & 0xf) {
+		case 0x0c:
+			AMIGAHW_SET(DENISE_HR);
+			break;
+		case 0x08:
+			AMIGAHW_SET(LISA);
+			break;
+		default:
+			AMIGAHW_SET(DENISE);
+			break;
+		}
+		break;
+	}
+	switch ((amiga_custom.vposr>>8) & 0x7f) {
+	case 0x00:
+		AMIGAHW_SET(AGNUS_PAL);
+		break;
+	case 0x10:
+		AMIGAHW_SET(AGNUS_NTSC);
+		break;
+	case 0x20:
+	case 0x21:
+		AMIGAHW_SET(AGNUS_HR_PAL);
+		break;
+	case 0x30:
+	case 0x31:
+		AMIGAHW_SET(AGNUS_HR_NTSC);
+		break;
+	case 0x22:
+	case 0x23:
+		AMIGAHW_SET(ALICE_PAL);
+		break;
+	case 0x32:
+	case 0x33:
+		AMIGAHW_SET(ALICE_NTSC);
+		break;
+	}
+	AMIGAHW_SET(ZORRO);
+
 #define AMIGAHW_ANNOUNCE(name, str)		\
 	if (AMIGAHW_PRESENT(name))		\
 		pr_cont(str)

From dc072012bc94287e3cc43e1429c5d72b2e6f4972 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Aug 2020 14:52:59 +0200
Subject: [PATCH 081/262] m68k: Sort selects in main Kconfig

Sort the list of select statements in the main Kconfig file for the m68k
architecture, to avoid conflicts when modifying this list.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20200826125259.24069-1-geert@linux-m68k.org
---
 arch/m68k/Kconfig | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 6f2f38d05772ab..93bbb74ea876d6 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -6,32 +6,32 @@ config M68K
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 	select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
 	select ARCH_NO_PREEMPT if !COLDFIRE
+	select ARCH_WANT_IPC_PARSE_VERSION
 	select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
 	select DMA_DIRECT_REMAP if HAS_DMA && MMU && !COLDFIRE
-	select HAVE_IDE
-	select HAVE_AOUT if MMU
-	select HAVE_ASM_MODVERSIONS
-	select HAVE_DEBUG_BUGVERBOSE
-	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
-	select NO_DMA if !MMU && !COLDFIRE
-	select HAVE_UID16
-	select VIRT_TO_BUS
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IOMAP
+	select GENERIC_IRQ_SHOW
 	select GENERIC_STRNCPY_FROM_USER if MMU
 	select GENERIC_STRNLEN_USER if MMU
-	select ARCH_WANT_IPC_PARSE_VERSION
+	select HAVE_AOUT if MMU
+	select HAVE_ASM_MODVERSIONS
+	select HAVE_DEBUG_BUGVERBOSE
 	select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
+	select HAVE_IDE
 	select HAVE_MOD_ARCH_SPECIFIC
+	select HAVE_UID16
+	select MMU_GATHER_NO_RANGE if MMU
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA
-	select OLD_SIGSUSPEND3
+	select NO_DMA if !MMU && !COLDFIRE
 	select OLD_SIGACTION
-	select MMU_GATHER_NO_RANGE if MMU
+	select OLD_SIGSUSPEND3
+	select VIRT_TO_BUS
 
 config CPU_BIG_ENDIAN
 	def_bool y

From 41f1bf37a63ecdb06b7b96646579170ddbe9def1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Aug 2020 15:01:03 +0200
Subject: [PATCH 082/262] m68k: mm: Use PAGE_ALIGNED() helper

Use the existing PAGE_ALIGNED() helper instead of open-coding the same
operation.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20200826130103.25381-1-geert@linux-m68k.org
---
 arch/m68k/mm/motorola.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 2bb006bdc31cd6..a9bdde54ca3501 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -226,7 +226,7 @@ static pte_t * __init kernel_page_table(void)
 {
 	pte_t *pte_table = last_pte_table;
 
-	if (((unsigned long)last_pte_table & ~PAGE_MASK) == 0) {
+	if (PAGE_ALIGNED(last_pte_table)) {
 		pte_table = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 		if (!pte_table) {
 			panic("%s: Failed to allocate %lu bytes align=%lx\n",
@@ -274,7 +274,7 @@ static pmd_t * __init kernel_ptr_table(void)
 	}
 
 	last_pmd_table += PTRS_PER_PMD;
-	if (((unsigned long)last_pmd_table & ~PAGE_MASK) == 0) {
+	if (PAGE_ALIGNED(last_pmd_table)) {
 		last_pmd_table = (pmd_t *)memblock_alloc_low(PAGE_SIZE,
 							   PAGE_SIZE);
 		if (!last_pmd_table)

From 7e158826564fbbb7284dabbaf71b6c3227845f51 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 26 Aug 2020 15:04:44 +0200
Subject: [PATCH 083/262] m68k: mm: Remove superfluous memblock_alloc*() casts

The return type of memblock_alloc*() is a void pointer, so there is no
need to cast it to "void *" or some other pointer type, before assigning
it to a pointer variable.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Link: https://lore.kernel.org/r/20200826130444.25618-1-geert@linux-m68k.org
---
 arch/m68k/mm/mcfmmu.c   | 2 +-
 arch/m68k/mm/motorola.c | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 2b9cb4a6228113..eac9dde6519344 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -42,7 +42,7 @@ void __init paging_init(void)
 	unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 };
 	int i;
 
-	empty_zero_page = (void *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	empty_zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!empty_zero_page)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 		      __func__, PAGE_SIZE, PAGE_SIZE);
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index a9bdde54ca3501..3a653f0a4188d4 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -227,7 +227,7 @@ static pte_t * __init kernel_page_table(void)
 	pte_t *pte_table = last_pte_table;
 
 	if (PAGE_ALIGNED(last_pte_table)) {
-		pte_table = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+		pte_table = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 		if (!pte_table) {
 			panic("%s: Failed to allocate %lu bytes align=%lx\n",
 					__func__, PAGE_SIZE, PAGE_SIZE);
@@ -275,8 +275,7 @@ static pmd_t * __init kernel_ptr_table(void)
 
 	last_pmd_table += PTRS_PER_PMD;
 	if (PAGE_ALIGNED(last_pmd_table)) {
-		last_pmd_table = (pmd_t *)memblock_alloc_low(PAGE_SIZE,
-							   PAGE_SIZE);
+		last_pmd_table = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 		if (!last_pmd_table)
 			panic("%s: Failed to allocate %lu bytes align=%lx\n",
 			      __func__, PAGE_SIZE, PAGE_SIZE);

From 352e04291115af2a74c271e0cd3643a632af6b7a Mon Sep 17 00:00:00 2001
From: "Alexander A. Klimov" <grandmaster@al2klimov.de>
Date: Wed, 26 Aug 2020 20:52:12 +0200
Subject: [PATCH 084/262] m68k: Replace HTTP links with HTTPS ones

Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

Deterministic algorithm:
For each file:
  If not .svg:
    For each line:
      If doesn't contain `\bxmlns\b`:
        For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
	  If neither `\bgnu\.org/license`, nor `\bmozilla\.org/MPL\b`:
            If both the HTTP and HTTPS versions
            return 200 OK and serve the same content:
              Replace HTTP with HTTPS.

Signed-off-by: Alexander A. Klimov <grandmaster@al2klimov.de>
Link: https://lore.kernel.org/r/20200826185212.3139-1-grandmaster@al2klimov.de
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/mac/macboing.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m68k/mac/macboing.c b/arch/m68k/mac/macboing.c
index 388780797f7d22..4de6229c7bfdeb 100644
--- a/arch/m68k/mac/macboing.c
+++ b/arch/m68k/mac/macboing.c
@@ -116,7 +116,7 @@ static void mac_init_asc( void )
 			 *   support 16-bit stereo output, but only mono input."
 			 *
 			 *   Technical Information Library (TIL) article number 16405. 
-			 *   http://support.apple.com/kb/TA32601 
+			 *   https://support.apple.com/kb/TA32601
 			 *
 			 * --David Kilzer
 			 */

From b4c971245925db1a6e0898878ad410f8f17ec59a Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Tue, 4 Aug 2020 16:53:47 +0800
Subject: [PATCH 085/262] arm64: traps: Add str of description to panic() in
 die()

Currently, there are different description strings in die() such as
die("Oops",,), die("Oops - BUG",,). And panic() called by die() will
always show "Fatal exception" or "Fatal exception in interrupt".

Note that panic() will run any panic handler via panic_notifier_list.
And the string above will be formatted and placed in static buf[]
which will be passed to handler.

So panic handler can not distinguish which Oops it is from the buf if
we want to do some things like reserve the string in memory or panic
statistics. It's not benefit to debug. We need to add more codes to
troubleshoot. Let's utilize existing resource to make debug much simpler.

Signed-off-by: Yue Hu <huyue2@yulong.com>
Link: https://lore.kernel.org/r/20200804085347.10720-1-zbestahu@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/traps.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 13ebd5ca20706a..af25cedf54e95e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -200,9 +200,9 @@ void die(const char *str, struct pt_regs *regs, int err)
 	oops_exit();
 
 	if (in_interrupt())
-		panic("Fatal exception in interrupt");
+		panic("%s: Fatal exception in interrupt", str);
 	if (panic_on_oops)
-		panic("Fatal exception");
+		panic("%s: Fatal exception", str);
 
 	raw_spin_unlock_irqrestore(&die_lock, flags);
 

From ffdbd3d83553beb0fda00081293d5640cba477a2 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Tue, 11 Aug 2020 13:35:05 +0800
Subject: [PATCH 086/262] arm64: perf: Add general hardware LLC events for
 PMUv3

This patch is to add the general hardware last level cache (LLC) events
for PMUv3: one event is for LLC access and another is for LLC miss.

With this change, perf tool can support last level cache profiling,
below is an example to demonstrate the usage on Arm64:

  $ perf stat -e LLC-load-misses -e LLC-loads -- \
	  perf bench mem memcpy -s 1024MB -l 100 -f default

  [...]

    Performance counter stats for 'perf bench mem memcpy -s 1024MB -l 100 -f default':

        35,534,262      LLC-load-misses           #    2.16% of all LL-cache hits
     1,643,946,443      LLC-loads

  [...]

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Link: https://lore.kernel.org/r/20200811053505.21223-1-leo.yan@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 462f9a9cc44be1..86e2328b0c2ec6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -69,6 +69,9 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
 	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB,
 
+	[C(LL)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD,
+	[C(LL)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_LL_CACHE_RD,
+
 	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
 	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 };

From d51eb416fa111c1129c46ea3320faab36bf4c99c Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Fri, 4 Sep 2020 10:21:37 +0800
Subject: [PATCH 087/262] drivers/perf: hisi: Add missing include of
 linux/module.h

MODULE_*** is used in HiSilicon uncore PMU drivers and is provided by
linux/module.h, but the header file is not directly included. Add the
missing include.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/1599186097-18599-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_uncore_pmu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index 25b0c97b3eb0a5..b59ec22169ab6c 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -14,6 +14,7 @@
 #include <linux/cpumask.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 

From ad14c19242b5a5f87aa86c4c930e668121de2809 Mon Sep 17 00:00:00 2001
From: Xiaoming Ni <nixiaoming@huawei.com>
Date: Fri, 28 Aug 2020 11:18:22 +0800
Subject: [PATCH 088/262] arm64: fix some spelling mistakes in the comments by
 codespell

arch/arm64/include/asm/cpu_ops.h:24: necesary ==> necessary
arch/arm64/include/asm/kvm_arm.h:69: maintainance ==> maintenance
arch/arm64/include/asm/cpufeature.h:361: capabilties ==> capabilities
arch/arm64/kernel/perf_regs.c:19: compatability ==> compatibility
arch/arm64/kernel/smp_spin_table.c:86: endianess ==> endianness
arch/arm64/kernel/smp_spin_table.c:88: endianess ==> endianness
arch/arm64/kvm/vgic/vgic-mmio-v3.c:1004: targetting ==> targeting
arch/arm64/kvm/vgic/vgic-mmio-v3.c:1005: targetting ==> targeting

Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
Link: https://lore.kernel.org/r/20200828031822.35928-1-nixiaoming@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu_ops.h    | 2 +-
 arch/arm64/include/asm/cpufeature.h | 2 +-
 arch/arm64/include/asm/kvm_arm.h    | 2 +-
 arch/arm64/kernel/perf_regs.c       | 2 +-
 arch/arm64/kernel/smp_spin_table.c  | 4 ++--
 arch/arm64/kvm/vgic/vgic-mmio-v3.c  | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index d28e8f37d3b402..e95c4df839115d 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -21,7 +21,7 @@
  *		mechanism for doing so, tests whether it is possible to boot
  *		the given CPU.
  * @cpu_boot:	Boots a cpu into the kernel.
- * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary
  *		synchronisation. Called from the cpu being booted.
  * @cpu_can_disable: Determines whether a CPU can be disabled based on
  *		mechanism-specific information.
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 89b4f0142c2878..3a42dc8e697cb5 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -358,7 +358,7 @@ static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
 }
 
 /*
- * Generic helper for handling capabilties with multiple (match,enable) pairs
+ * Generic helper for handling capabilities with multiple (match,enable) pairs
  * of call backs, sharing the same capability bit.
  * Iterate over each entry to see if at least one matches.
  */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 1da8e3dc445551..32acf95d49c7ea 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -66,7 +66,7 @@
  * TWI:		Trap WFI
  * TIDCP:	Trap L2CTLR/L2ECTLR
  * BSU_IS:	Upgrade barriers to the inner shareable domain
- * FB:		Force broadcast of all maintainance operations
+ * FB:		Force broadcast of all maintenance operations
  * AMO:		Override CPSR.A and enable signaling with VA
  * IMO:		Override CPSR.I and enable signaling with VI
  * FMO:		Override CPSR.F and enable signaling with VF
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 666b225aeb3add..94e8718e722900 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -16,7 +16,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 
 	/*
 	 * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but
-	 * we're stuck with it for ABI compatability reasons.
+	 * we're stuck with it for ABI compatibility reasons.
 	 *
 	 * For a 32-bit consumer inspecting a 32-bit task, then it will look at
 	 * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h).
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index c8a3fee00c1132..5892e79fa4294e 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -83,9 +83,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
 
 	/*
 	 * We write the release address as LE regardless of the native
-	 * endianess of the kernel. Therefore, any boot-loaders that
+	 * endianness of the kernel. Therefore, any boot-loaders that
 	 * read this address need to convert this address to the
-	 * boot-loader's endianess before jumping. This is mandated by
+	 * boot-loader's endianness before jumping. This is mandated by
 	 * the boot protocol.
 	 */
 	writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 5c786b915cd34c..52d6f24f65dcf2 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -1001,8 +1001,8 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
 		raw_spin_lock_irqsave(&irq->irq_lock, flags);
 
 		/*
-		 * An access targetting Group0 SGIs can only generate
-		 * those, while an access targetting Group1 SGIs can
+		 * An access targeting Group0 SGIs can only generate
+		 * those, while an access targeting Group1 SGIs can
 		 * generate interrupts of either group.
 		 */
 		if (!irq->group || allow_group1) {

From 9a747c91e8d69d7283c850031d2462f4d27bf25b Mon Sep 17 00:00:00 2001
From: Yanfei Xu <yanfei.xu@windriver.com>
Date: Tue, 1 Sep 2020 17:11:54 +0800
Subject: [PATCH 089/262] arm64/numa: Fix a typo in comment of arm64_numa_init

Fix a typo in comment of arm64_numa_init. 'encomapssing' should
be 'encompassing'.

Signed-off-by: Yanfei Xu <yanfei.xu@windriver.com>
Link: https://lore.kernel.org/r/20200901091154.10112-1-yanfei.xu@windriver.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 73f8b49d485c2c..d402da26cdca56 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -448,7 +448,7 @@ static int __init dummy_numa_init(void)
  * arm64_numa_init() - Initialize NUMA
  *
  * Try each configured NUMA initialization method until one succeeds. The
- * last fallback is dummy single node config encomapssing whole memory.
+ * last fallback is dummy single node config encompassing whole memory.
  */
 void __init arm64_numa_init(void)
 {

From 1ab64cf81489e871bcb94fb2dea35c7fac561fff Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Tue, 18 Aug 2020 14:36:24 +0800
Subject: [PATCH 090/262] ACPI/IORT: Drop the unused @ops of
 iort_add_device_replay()

Since commit d2e1a003af56 ("ACPI/IORT: Don't call iommu_ops->add_device
directly"), we use the IOMMU core API to replace a direct invoke of the
specified callback. The parameter @ops has therefore became unused. Let's
drop it.

Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Hanjun Guo <guohanjun@huawei.com>
Link: https://lore.kernel.org/r/20200818063625.980-2-yuzenghui@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/arm64/iort.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index ec782e4a0fe419..a0ece0e201b2fb 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -811,8 +811,7 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
 	return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
 }
 
-static inline int iort_add_device_replay(const struct iommu_ops *ops,
-					 struct device *dev)
+static inline int iort_add_device_replay(struct device *dev)
 {
 	int err = 0;
 
@@ -1072,7 +1071,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
 	 */
 	if (!err) {
 		ops = iort_fwspec_iommu_ops(dev);
-		err = iort_add_device_replay(ops, dev);
+		err = iort_add_device_replay(dev);
 	}
 
 	/* Ignore all other errors apart from EPROBE_DEFER */
@@ -1089,8 +1088,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
 #else
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
 { return NULL; }
-static inline int iort_add_device_replay(const struct iommu_ops *ops,
-					 struct device *dev)
+static inline int iort_add_device_replay(struct device *dev)
 { return 0; }
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }

From c2bea7a1a1c0c4c5c970696ff556a73f55f998eb Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Tue, 18 Aug 2020 14:36:25 +0800
Subject: [PATCH 091/262] ACPI/IORT: Remove the unused inline functions

Since commit 8212688600ed ("ACPI/IORT: Fix build error when IOMMU_SUPPORT
is disabled"), iort_fwspec_iommu_ops() and iort_add_device_replay() are not
needed anymore when CONFIG_IOMMU_API is not selected. Let's remove them.

Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Hanjun Guo <guohanjun@huawei.com>
Link: https://lore.kernel.org/r/20200818063625.980-3-yuzenghui@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/arm64/iort.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index a0ece0e201b2fb..e670785a620146 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1086,10 +1086,6 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
 }
 
 #else
-static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
-{ return NULL; }
-static inline int iort_add_device_replay(struct device *dev)
-{ return 0; }
 int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
 { return 0; }
 const struct iommu_ops *iort_iommu_configure_id(struct device *dev,

From 120dc60d0bdbadcad7460222f74c9ed15cdeb73e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 25 Aug 2020 15:54:40 +0200
Subject: [PATCH 092/262] arm64: get rid of TEXT_OFFSET

TEXT_OFFSET serves no purpose, and for this reason, it was redefined
as 0x0 in the v5.8 timeframe. Since this does not appear to have caused
any issues that require us to revisit that decision, let's get rid of the
macro entirely, along with any references to it.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20200825135440.11288-1-ardb@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Makefile                       |  6 ------
 arch/arm64/include/asm/boot.h             |  3 +--
 arch/arm64/include/asm/kernel-pgtable.h   |  2 +-
 arch/arm64/include/asm/memory.h           |  2 +-
 arch/arm64/kernel/Makefile                |  2 --
 arch/arm64/kernel/head.S                  | 16 ++++++----------
 arch/arm64/kernel/image.h                 |  1 -
 arch/arm64/kernel/vmlinux.lds.S           |  4 ++--
 drivers/firmware/efi/libstub/Makefile     |  1 -
 drivers/firmware/efi/libstub/arm64-stub.c |  6 +++---
 10 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 130569f90c54ae..0fd4c1be4f645e 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -11,7 +11,6 @@
 # Copyright (C) 1995-2001 by Russell King
 
 LDFLAGS_vmlinux	:=--no-undefined -X
-CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 ifeq ($(CONFIG_RELOCATABLE), y)
 # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
@@ -132,9 +131,6 @@ endif
 # Default value
 head-y		:= arch/arm64/kernel/head.o
 
-# The byte offset of the kernel image in RAM from the start of RAM.
-TEXT_OFFSET := 0x0
-
 ifeq ($(CONFIG_KASAN_SW_TAGS), y)
 KASAN_SHADOW_SCALE_SHIFT := 4
 else
@@ -145,8 +141,6 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
 KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
 KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT)
 
-export	TEXT_OFFSET
-
 core-y		+= arch/arm64/
 libs-y		:= arch/arm64/lib/ $(libs-y)
 libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index c7f67da13cd99a..3e7943fd17a445 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -13,8 +13,7 @@
 #define MAX_FDT_SIZE		SZ_2M
 
 /*
- * arm64 requires the kernel image to placed
- * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ * arm64 requires the kernel image to placed at a 2 MB aligned base address
  */
 #define MIN_KIMG_ALIGN		SZ_2M
 
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 329fb15f6bac33..19ca76ea60d98a 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -86,7 +86,7 @@
 			+ EARLY_PGDS((vstart), (vend)) 	/* each PGDIR needs a next level page table */	\
 			+ EARLY_PUDS((vstart), (vend))	/* each PUD needs a next level page table */	\
 			+ EARLY_PMDS((vstart), (vend)))	/* each PMD needs a next level page table */
-#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
+#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
 #define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index afa722504bfde9..20a9b322d342c6 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -169,7 +169,7 @@ extern s64			memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
 #define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
-/* the virtual base of the kernel image (minus TEXT_OFFSET) */
+/* the virtual base of the kernel image */
 extern u64			kimage_vaddr;
 
 /* the offset between the kernel virtual and physical mappings */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index a561cbb91d4dc5..4197ef2fb22eb5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -3,8 +3,6 @@
 # Makefile for the linux kernel.
 #
 
-CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
-AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
 CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 037421c66b147c..d8d9caf02834e0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -36,14 +36,10 @@
 
 #include "efi-header.S"
 
-#define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
+#define __PHYS_OFFSET	KERNEL_START
 
-#if (TEXT_OFFSET & 0xfff) != 0
-#error TEXT_OFFSET must be at least 4KB aligned
-#elif (PAGE_OFFSET & 0x1fffff) != 0
+#if (PAGE_OFFSET & 0x1fffff) != 0
 #error PAGE_OFFSET must be at least 2MB aligned
-#elif TEXT_OFFSET > 0x1fffff
-#error TEXT_OFFSET must be less than 2MB
 #endif
 
 /*
@@ -55,7 +51,7 @@
  *   x0 = physical address to the FDT blob.
  *
  * This code is mostly position independent so you call this at
- * __pa(PAGE_OFFSET + TEXT_OFFSET).
+ * __pa(PAGE_OFFSET).
  *
  * Note that the callee-saved registers are used for storing variables
  * that are useful before the MMU is enabled. The allocations are described
@@ -77,7 +73,7 @@ _head:
 	b	primary_entry			// branch to kernel start, magic
 	.long	0				// reserved
 #endif
-	le64sym	_kernel_offset_le		// Image load offset from start of RAM, little-endian
+	.quad	0				// Image load offset from start of RAM, little-endian
 	le64sym	_kernel_size_le			// Effective size of kernel image, little-endian
 	le64sym	_kernel_flags_le		// Informative flags, little-endian
 	.quad	0				// reserved
@@ -382,7 +378,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
 	adrp	x0, init_pg_dir
-	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
+	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
 	mov	x4, PTRS_PER_PGD
 	adrp	x6, _end			// runtime __pa(_end)
@@ -474,7 +470,7 @@ SYM_FUNC_END(__primary_switched)
 
 	.pushsection ".rodata", "a"
 SYM_DATA_START(kimage_vaddr)
-	.quad		_text - TEXT_OFFSET
+	.quad		_text
 SYM_DATA_END(kimage_vaddr)
 EXPORT_SYMBOL(kimage_vaddr)
 	.popsection
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index c7d38c660372c4..7bc3ba89790191 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -62,7 +62,6 @@
  */
 #define HEAD_SYMBOLS						\
 	DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text);	\
-	DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET);	\
 	DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
 
 #endif /* __ARM64_KERNEL_IMAGE_H */
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7cba7623fcec75..82801d98a2b70d 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -105,7 +105,7 @@ SECTIONS
 		*(.eh_frame)
 	}
 
-	. = KIMAGE_VADDR + TEXT_OFFSET;
+	. = KIMAGE_VADDR;
 
 	.head.text : {
 		_text = .;
@@ -274,4 +274,4 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
  */
-ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned")
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 296b18fbd7a2d6..5a80cf6bd60666 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -64,7 +64,6 @@ lib-$(CONFIG_ARM)		+= arm32-stub.o
 lib-$(CONFIG_ARM64)		+= arm64-stub.o
 lib-$(CONFIG_X86)		+= x86-stub.o
 CFLAGS_arm32-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
-CFLAGS_arm64-stub.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
 # For x86, bootloaders like systemd-boot or grub-efi do not zero-initialize the
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index e5bfac79e5ac97..fc178398f1acd2 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -77,7 +77,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 
 	kernel_size = _edata - _text;
 	kernel_memsize = kernel_size + (_end - _edata);
-	*reserve_size = kernel_memsize + TEXT_OFFSET % min_kimg_align();
+	*reserve_size = kernel_memsize;
 
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
 		/*
@@ -91,7 +91,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 	}
 
 	if (status != EFI_SUCCESS) {
-		if (IS_ALIGNED((u64)_text - TEXT_OFFSET, min_kimg_align())) {
+		if (IS_ALIGNED((u64)_text, min_kimg_align())) {
 			/*
 			 * Just execute from wherever we were loaded by the
 			 * UEFI PE/COFF loader if the alignment is suitable.
@@ -111,7 +111,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
 		}
 	}
 
-	*image_addr = *reserve_addr + TEXT_OFFSET % min_kimg_align();
+	*image_addr = *reserve_addr;
 	memcpy((void *)*image_addr, _text, kernel_size);
 
 	return EFI_SUCCESS;

From 44fdf4ed2693eb05dbfa83beaa6fe5fbd0c2f6d0 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Fri, 4 Sep 2020 17:57:38 +0800
Subject: [PATCH 093/262] arm64: perf: Remove unnecessary event_idx check

event_idx is obtained from armv8pmu_get_event_idx(), and this idx must be
between ARMV8_IDX_CYCLE_COUNTER and cpu_pmu->num_events. So it's unnecessary
to do this check. Let's remove it.

Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1599213458-28394-1-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 86e2328b0c2ec6..04d0ceb72a67f3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -310,8 +310,6 @@ static struct attribute_group armv8_pmuv3_format_attr_group = {
  */
 #define	ARMV8_IDX_CYCLE_COUNTER	0
 #define	ARMV8_IDX_COUNTER0	1
-#define	ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
-	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
 
 /*
@@ -368,12 +366,6 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr)
 	return pmovsr & ARMV8_PMU_OVERFLOWED_MASK;
 }
 
-static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
-{
-	return idx >= ARMV8_IDX_CYCLE_COUNTER &&
-		idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
-}
-
 static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 {
 	return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
@@ -443,15 +435,11 @@ static u64 armv8pmu_unbias_long_counter(struct perf_event *event, u64 value)
 
 static u64 armv8pmu_read_counter(struct perf_event *event)
 {
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	u64 value = 0;
 
-	if (!armv8pmu_counter_valid(cpu_pmu, idx))
-		pr_err("CPU%u reading wrong counter %d\n",
-			smp_processor_id(), idx);
-	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
+	if (idx == ARMV8_IDX_CYCLE_COUNTER)
 		value = read_sysreg(pmccntr_el0);
 	else
 		value = armv8pmu_read_hw_counter(event);
@@ -480,16 +468,12 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event,
 
 static void armv8pmu_write_counter(struct perf_event *event, u64 value)
 {
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
 	value = armv8pmu_bias_long_counter(event, value);
 
-	if (!armv8pmu_counter_valid(cpu_pmu, idx))
-		pr_err("CPU%u writing wrong counter %d\n",
-			smp_processor_id(), idx);
-	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
+	if (idx == ARMV8_IDX_CYCLE_COUNTER)
 		write_sysreg(value, pmccntr_el0);
 	else
 		armv8pmu_write_hw_counter(event, value);

From c048ddf86cdd066db283ce838a0049d2bbefb9e5 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 4 Sep 2020 14:00:59 +0530
Subject: [PATCH 094/262] arm64/mm/ptdump: Add address markers for BPF regions

Kernel virtual region [BPF_JIT_REGION_START..BPF_JIT_REGION_END] is missing
from address_markers[], hence relevant page table entries are not displayed
with /sys/kernel/debug/kernel_page_tables. This adds those missing markers.
While here, also rename arch/arm64/mm/dump.c which sounds bit ambiguous, as
arch/arm64/mm/ptdump.c instead.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/1599208259-11191-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/Makefile             | 2 +-
 arch/arm64/mm/{dump.c => ptdump.c} | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)
 rename arch/arm64/mm/{dump.c => ptdump.c} (99%)

diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index d91030f0ffeeef..2a1d275cd4d75b 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,7 +4,7 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   ioremap.o mmap.o pgd.o mmu.o \
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_PTDUMP_CORE)	+= dump.o
+obj-$(CONFIG_PTDUMP_CORE)	+= ptdump.o
 obj-$(CONFIG_PTDUMP_DEBUGFS)	+= ptdump_debugfs.o
 obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/ptdump.c
similarity index 99%
rename from arch/arm64/mm/dump.c
rename to arch/arm64/mm/ptdump.c
index 0b8da1cc1c07e8..265284dc942df3 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -41,6 +41,8 @@ static struct addr_marker address_markers[] = {
 	{ 0 /* KASAN_SHADOW_START */,	"Kasan shadow start" },
 	{ KASAN_SHADOW_END,		"Kasan shadow end" },
 #endif
+	{ BPF_JIT_REGION_START,		"BPF start" },
+	{ BPF_JIT_REGION_END,		"BPF end" },
 	{ MODULES_VADDR,		"Modules start" },
 	{ MODULES_END,			"Modules end" },
 	{ VMALLOC_START,		"vmalloc() area" },

From 3102bc0e6ac752cc5df896acb557d779af4d82a1 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Sat, 29 Aug 2020 14:00:16 +0100
Subject: [PATCH 095/262] arm64: topology: Stop using MPIDR for topology
 information

In the absence of ACPI or DT topology data, we fallback to haphazardly
decoding *something* out of MPIDR. Sadly, the contents of that register are
mostly unusable due to the implementation leniancy and things like Aff0
having to be capped to 15 (despite being encoded on 8 bits).

Consider a simple system with a single package of 32 cores, all under the
same LLC. We ought to be shoving them in the same core_sibling mask, but
MPIDR is going to look like:

  | CPU  | 0 | ... | 15 | 16 | ... | 31 |
  |------+---+-----+----+----+-----+----+
  | Aff0 | 0 | ... | 15 |  0 | ... | 15 |
  | Aff1 | 0 | ... |  0 |  1 | ... |  1 |
  | Aff2 | 0 | ... |  0 |  0 | ... |  0 |

Which will eventually yield

  core_sibling(0-15)  == 0-15
  core_sibling(16-31) == 16-31

NUMA woes
=========

If we try to play games with this and set up NUMA boundaries within those
groups of 16 cores via e.g. QEMU:

  # Node0: 0-9; Node1: 10-19
  $ qemu-system-aarch64 <blah> \
    -smp 20 -numa node,cpus=0-9,nodeid=0 -numa node,cpus=10-19,nodeid=1

The scheduler's MC domain (all CPUs with same LLC) is going to be built via

  arch_topology.c::cpu_coregroup_mask()

In there we try to figure out a sensible mask out of the topology
information we have. In short, here we'll pick the smallest of NUMA or
core sibling mask.

  node_mask(CPU9)    == 0-9
  core_sibling(CPU9) == 0-15

MC mask for CPU9 will thus be 0-9, not a problem.

  node_mask(CPU10)    == 10-19
  core_sibling(CPU10) == 0-15

MC mask for CPU10 will thus be 10-19, not a problem.

  node_mask(CPU16)    == 10-19
  core_sibling(CPU16) == 16-19

MC mask for CPU16 will thus be 16-19... Uh oh. CPUs 16-19 are in two
different unique MC spans, and the scheduler has no idea what to make of
that. That triggers the WARN_ON() added by commit

  ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap")

Fixing MPIDR-derived topology
=============================

We could try to come up with some cleverer scheme to figure out which of
the available masks to pick, but really if one of those masks resulted from
MPIDR then it should be discarded because it's bound to be bogus.

I was hoping to give MPIDR a chance for SMT, to figure out which threads are
in the same core using Aff1-3 as core ID, but Sudeep and Robin pointed out
to me that there are systems out there where *all* cores have non-zero
values in their higher affinity fields (e.g. RK3288 has "5" in all of its
cores' MPIDR.Aff1), which would expose a bogus core ID to userspace.

Stop using MPIDR for topology information. When no other source of topology
information is available, mark each CPU as its own core and its NUMA node
as its LLC domain.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20200829130016.26106-1-valentin.schneider@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/topology.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 0801a0f3c156af..ff1dd1dbfe641d 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -36,21 +36,23 @@ void store_cpu_topology(unsigned int cpuid)
 	if (mpidr & MPIDR_UP_BITMASK)
 		return;
 
-	/* Create cpu topology mapping based on MPIDR. */
-	if (mpidr & MPIDR_MT_BITMASK) {
-		/* Multiprocessor system : Multi-threads per core */
-		cpuid_topo->thread_id  = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-		cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) |
-					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8;
-	} else {
-		/* Multiprocessor system : Single-thread per core */
-		cpuid_topo->thread_id  = -1;
-		cpuid_topo->core_id    = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-		cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) |
-					 MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 |
-					 MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16;
-	}
+	/*
+	 * This would be the place to create cpu topology based on MPIDR.
+	 *
+	 * However, it cannot be trusted to depict the actual topology; some
+	 * pieces of the architecture enforce an artificial cap on Aff0 values
+	 * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an
+	 * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up
+	 * having absolutely no relationship to the actual underlying system
+	 * topology, and cannot be reasonably used as core / package ID.
+	 *
+	 * If the MT bit is set, Aff0 *could* be used to define a thread ID, but
+	 * we still wouldn't be able to obtain a sane core ID. This means we
+	 * need to entirely ignore MPIDR for any topology deduction.
+	 */
+	cpuid_topo->thread_id  = -1;
+	cpuid_topo->core_id    = cpuid;
+	cpuid_topo->package_id = cpu_to_node(cpuid);
 
 	pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n",
 		 cpuid, cpuid_topo->package_id, cpuid_topo->core_id,

From fbd4ab780284aeb66374223fa3ac0cd47611aebe Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Mon, 7 Sep 2020 08:32:25 -0700
Subject: [PATCH 096/262] EDAC, sb_edac: Simplify switch statement

clang static analyzer reports this problem

sb_edac.c:959:2: warning: Undefined or garbage value
  returned to caller
        return type;
        ^~~~~~~~~~~

This is a false positive.

However by initializing the type to DEV_UNKNOWN the 3 case can be
removed from the switch, saving a comparison and jump.

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20200907153225.7294-1-trix@redhat.com
---
 drivers/edac/sb_edac.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index a6704e73fcce5c..9a0d53f44baaa8 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -939,12 +939,9 @@ static enum dev_type sbridge_get_width(struct sbridge_pvt *pvt, u32 mtr)
 
 static enum dev_type __ibridge_get_width(u32 mtr)
 {
-	enum dev_type type;
+	enum dev_type type = DEV_UNKNOWN;
 
 	switch (mtr) {
-	case 3:
-		type = DEV_UNKNOWN;
-		break;
 	case 2:
 		type = DEV_X16;
 		break;

From 0a4bb5e5507a585532cc413125b921c8546fc39f Mon Sep 17 00:00:00 2001
From: Arvind Sankar <nivedita@alum.mit.edu>
Date: Mon, 7 Sep 2020 17:39:19 -0400
Subject: [PATCH 097/262] x86/fpu: Allow multiple bits in clearcpuid= parameter

Commit

  0c2a3913d6f5 ("x86/fpu: Parse clearcpuid= as early XSAVE argument")

changed clearcpuid parsing from __setup() to cmdline_find_option().
While the __setup() function would have been called for each clearcpuid=
parameter on the command line, cmdline_find_option() will only return
the last one, so the change effectively made it impossible to disable
more than one bit.

Allow a comma-separated list of bit numbers as the argument for
clearcpuid to allow multiple bits to be disabled again. Log the bits
being disabled for informational purposes.

Also fix the check on the return value of cmdline_find_option(). It
returns -1 when the option is not found, so testing as a boolean is
incorrect.

Fixes: 0c2a3913d6f5 ("x86/fpu: Parse clearcpuid= as early XSAVE argument")
Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200907213919.2423441-1-nivedita@alum.mit.edu
---
 .../admin-guide/kernel-parameters.txt         |  2 +-
 arch/x86/kernel/fpu/init.c                    | 30 ++++++++++++++-----
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df11..ffe864390c5aca 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -577,7 +577,7 @@
 			loops can be debugged more effectively on production
 			systems.
 
-	clearcpuid=BITNUM [X86]
+	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
 			numbers. Note the Linux specific bits are not necessarily
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 61ddc3a5e5c2b6..f8ff895aaf7e17 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -243,9 +243,9 @@ static void __init fpu__init_system_ctx_switch(void)
  */
 static void __init fpu__init_parse_early_param(void)
 {
-	char arg[32];
+	char arg[128];
 	char *argptr = arg;
-	int bit;
+	int arglen, res, bit;
 
 #ifdef CONFIG_X86_32
 	if (cmdline_find_option_bool(boot_command_line, "no387"))
@@ -268,12 +268,26 @@ static void __init fpu__init_parse_early_param(void)
 	if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
 		setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 
-	if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
-				sizeof(arg)) &&
-	    get_option(&argptr, &bit) &&
-	    bit >= 0 &&
-	    bit < NCAPINTS * 32)
-		setup_clear_cpu_cap(bit);
+	arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
+	if (arglen <= 0)
+		return;
+
+	pr_info("Clearing CPUID bits:");
+	do {
+		res = get_option(&argptr, &bit);
+		if (res == 0 || res == 3)
+			break;
+
+		/* If the argument was too long, the last bit may be cut off */
+		if (res == 1 && arglen >= sizeof(arg))
+			break;
+
+		if (bit >= 0 && bit < NCAPINTS * 32) {
+			pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+			setup_clear_cpu_cap(bit);
+		}
+	} while (res == 2);
+	pr_cont("\n");
 }
 
 /*

From e2def7d49d0812ea40a224161b2001b2e815dce2 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 6 Sep 2020 23:02:52 +0200
Subject: [PATCH 098/262] x86/mce: Make mce_rdmsrl() panic on an inaccessible
 MSR

If an exception needs to be handled while reading an MSR - which is in
most of the cases caused by a #GP on a non-existent MSR - then this
is most likely the incarnation of a BIOS or a hardware bug. Such bug
violates the architectural guarantee that MCA banks are present with all
MSRs belonging to them.

The proper fix belongs in the hardware/firmware - not in the kernel.

Handling an #MC exception which is raised while an NMI is being handled
would cause the nasty NMI nesting issue because of the shortcoming of
IRET of reenabling NMIs when executed. And the machine is in an #MC
context already so <Deity> be at its side.

Tracing MSR accesses while in #MC is another no-no due to tracing being
inherently a bad idea in atomic context:

  vmlinux.o: warning: objtool: do_machine_check()+0x4a: call to mce_rdmsrl() leaves .noinstr.text section

so remove all that "additional" functionality from mce_rdmsrl() and
provide it with a special exception handler which panics the machine
when that MSR is not accessible.

The exception handler prints a human-readable message explaining what
the panic reason is but, what is more, it panics while in the #GP
handler and latter won't have executed an IRET, thus opening the NMI
nesting issue in the case when the #MC has happened while handling
an NMI. (#MC itself won't be reenabled until MCG_STATUS hasn't been
cleared).

Suggested-by: Andy Lutomirski <luto@kernel.org>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
[ Add missing prototypes for ex_handler_* ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20200906212130.GA28456@zn.tnic
---
 arch/x86/kernel/cpu/mce/core.c     | 72 +++++++++++++++++++++++++-----
 arch/x86/kernel/cpu/mce/internal.h | 10 +++++
 2 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 0ba24dfffdb278..a697baee9aa0b2 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -373,10 +373,28 @@ static int msr_to_offset(u32 msr)
 	return -1;
 }
 
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+				      struct pt_regs *regs, int trapnr,
+				      unsigned long error_code,
+				      unsigned long fault_addr)
+{
+	pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
+		 (unsigned int)regs->cx, regs->ip, (void *)regs->ip);
+
+	show_stack_regs(regs);
+
+	panic("MCA architectural violation!\n");
+
+	while (true)
+		cpu_relax();
+
+	return true;
+}
+
 /* MSR access wrappers used for error injection */
 static u64 mce_rdmsrl(u32 msr)
 {
-	u64 v;
+	DECLARE_ARGS(val, low, high);
 
 	if (__this_cpu_read(injectm.finished)) {
 		int offset = msr_to_offset(msr);
@@ -386,21 +404,43 @@ static u64 mce_rdmsrl(u32 msr)
 		return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
 	}
 
-	if (rdmsrl_safe(msr, &v)) {
-		WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
-		/*
-		 * Return zero in case the access faulted. This should
-		 * not happen normally but can happen if the CPU does
-		 * something weird, or if the code is buggy.
-		 */
-		v = 0;
-	}
+	/*
+	 * RDMSR on MCA MSRs should not fault. If they do, this is very much an
+	 * architectural violation and needs to be reported to hw vendor. Panic
+	 * the box to not allow any further progress.
+	 */
+	asm volatile("1: rdmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
+		     : EAX_EDX_RET(val, low, high) : "c" (msr));
 
-	return v;
+
+	return EAX_EDX_VAL(val, low, high);
+}
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+				      struct pt_regs *regs, int trapnr,
+				      unsigned long error_code,
+				      unsigned long fault_addr)
+{
+	pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
+		 (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
+		  regs->ip, (void *)regs->ip);
+
+	show_stack_regs(regs);
+
+	panic("MCA architectural violation!\n");
+
+	while (true)
+		cpu_relax();
+
+	return true;
 }
 
 static void mce_wrmsrl(u32 msr, u64 v)
 {
+	u32 low, high;
+
 	if (__this_cpu_read(injectm.finished)) {
 		int offset = msr_to_offset(msr);
 
@@ -408,7 +448,15 @@ static void mce_wrmsrl(u32 msr, u64 v)
 			*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
 		return;
 	}
-	wrmsrl(msr, v);
+
+	low  = (u32)v;
+	high = (u32)(v >> 32);
+
+	/* See comment in mce_rdmsrl() */
+	asm volatile("1: wrmsr\n"
+		     "2:\n"
+		     _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
+		     : : "c" (msr), "a"(low), "d" (high) : "memory");
 }
 
 /*
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 6473070b5da49b..b122610e9046a5 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -185,4 +185,14 @@ extern bool amd_filter_mce(struct mce *m);
 static inline bool amd_filter_mce(struct mce *m)			{ return false; };
 #endif
 
+__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
+				      struct pt_regs *regs, int trapnr,
+				      unsigned long error_code,
+				      unsigned long fault_addr);
+
+__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
+				      struct pt_regs *regs, int trapnr,
+				      unsigned long error_code,
+				      unsigned long fault_addr);
+
 #endif /* __X86_MCE_INTERNAL_H__ */

From 8687bdc04128b2bd16faaae11db10128ad0da7b8 Mon Sep 17 00:00:00 2001
From: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Date: Tue, 8 Sep 2020 18:57:45 +0800
Subject: [PATCH 099/262] x86/cpu/centaur: Replace two-condition switch-case
 with an if statement

Use a normal if statements instead of a two-condition switch-case.

 [ bp: Massage commit message. ]

Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/1599562666-31351-2-git-send-email-TonyWWang-oc@zhaoxin.com
---
 arch/x86/kernel/cpu/centaur.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index c5cf336e50776d..5f811586a23cfb 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -90,18 +90,14 @@ enum {
 
 static void early_init_centaur(struct cpuinfo_x86 *c)
 {
-	switch (c->x86) {
 #ifdef CONFIG_X86_32
-	case 5:
-		/* Emulate MTRRs using Centaur's MCR. */
+	/* Emulate MTRRs using Centaur's MCR. */
+	if (c->x86 == 5)
 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
-		break;
 #endif
-	case 6:
-		if (c->x86_model >= 0xf)
-			set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-		break;
-	}
+	if (c->x86 == 6 && c->x86_model >= 0xf)
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
 #endif
@@ -145,9 +141,8 @@ static void init_centaur(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
-	switch (c->x86) {
 #ifdef CONFIG_X86_32
-	case 5:
+	if (c->x86 == 5) {
 		switch (c->x86_model) {
 		case 4:
 			name = "C6";
@@ -207,12 +202,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
 			c->x86_cache_size = (cc>>24)+(dd>>24);
 		}
 		sprintf(c->x86_model_id, "WinChip %s", name);
-		break;
+	}
 #endif
-	case 6:
+	if (c->x86 == 6)
 		init_c3(c);
-		break;
-	}
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 #endif

From 33b4711df4c1b3aec7c267c60fc24abccfadd40c Mon Sep 17 00:00:00 2001
From: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Date: Tue, 8 Sep 2020 18:57:46 +0800
Subject: [PATCH 100/262] x86/cpu/centaur: Add Centaur family >=7 CPUs
 initialization support

Add Centaur family >=7 CPUs specific initialization support.

Signed-off-by: Tony W Wang-oc <TonyWWang-oc@zhaoxin.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/1599562666-31351-3-git-send-email-TonyWWang-oc@zhaoxin.com
---
 arch/x86/kernel/cpu/centaur.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 5f811586a23cfb..345f7d905db677 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -65,6 +65,9 @@ static void init_c3(struct cpuinfo_x86 *c)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	}
+
+	if (c->x86 >= 7)
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 }
 
 enum {
@@ -95,7 +98,8 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
 	if (c->x86 == 5)
 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
 #endif
-	if (c->x86 == 6 && c->x86_model >= 0xf)
+	if ((c->x86 == 6 && c->x86_model >= 0xf) ||
+	    (c->x86 >= 7))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
 #ifdef CONFIG_X86_64
@@ -204,7 +208,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
 		sprintf(c->x86_model_id, "WinChip %s", name);
 	}
 #endif
-	if (c->x86 == 6)
+	if (c->x86 == 6 || c->x86 >= 7)
 		init_c3(c);
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);

From b65399f6111b03df870d8daa75b8d140c0de93f4 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 9 Sep 2020 10:23:02 +0530
Subject: [PATCH 101/262] arm64/mm: Change THP helpers to comply with generic
 MM semantics

pmd_present() and pmd_trans_huge() are expected to behave in the following
manner during various phases of a given PMD. It is derived from a previous
detailed discussion on this topic [1] and present THP documentation [2].

pmd_present(pmd):

- Returns true if pmd refers to system RAM with a valid pmd_page(pmd)
- Returns false if pmd refers to a migration or swap entry

pmd_trans_huge(pmd):

- Returns true if pmd refers to system RAM and is a trans huge mapping

-------------------------------------------------------------------------
|	PMD states	|	pmd_present	|	pmd_trans_huge	|
-------------------------------------------------------------------------
|	Mapped		|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Splitting	|	Yes		|	Yes		|
-------------------------------------------------------------------------
|	Migration/Swap	|	No		|	No		|
-------------------------------------------------------------------------

The problem:

PMD is first invalidated with pmdp_invalidate() before it's splitting. This
invalidation clears PMD_SECT_VALID as below.

PMD Split -> pmdp_invalidate() -> pmd_mkinvalid -> Clears PMD_SECT_VALID

Once PMD_SECT_VALID gets cleared, it results in pmd_present() return false
on the PMD entry. It will need another bit apart from PMD_SECT_VALID to re-
affirm pmd_present() as true during the THP split process. To comply with
above mentioned semantics, pmd_trans_huge() should also check pmd_present()
first before testing presence of an actual transparent huge mapping.

The solution:

Ideally PMD_TYPE_SECT should have been used here instead. But it shares the
bit position with PMD_SECT_VALID which is used for THP invalidation. Hence
it will not be there for pmd_present() check after pmdp_invalidate().

A new software defined PMD_PRESENT_INVALID (bit 59) can be set on the PMD
entry during invalidation which can help pmd_present() return true and in
recognizing the fact that it still points to memory.

This bit is transient. During the split process it will be overridden by a
page table page representing normal pages in place of erstwhile huge page.
Other pmdp_invalidate() callers always write a fresh PMD value on the entry
overriding this transient PMD_PRESENT_INVALID bit, which makes it safe.

[1]: https://lkml.org/lkml/2018/10/17/231
[2]: https://www.kernel.org/doc/Documentation/vm/transhuge.txt

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/1599627183-14453-2-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable-prot.h |  7 ++++++
 arch/arm64/include/asm/pgtable.h      | 34 ++++++++++++++++++++++++---
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 4d867c6446c484..2df4b75fce3c45 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -19,6 +19,13 @@
 #define PTE_DEVMAP		(_AT(pteval_t, 1) << 57)
 #define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */
 
+/*
+ * This bit indicates that the entry is present i.e. pmd_page()
+ * still points to a valid huge page in memory even if the pmd
+ * has been invalidated.
+ */
+#define PMD_PRESENT_INVALID	(_AT(pteval_t, 1) << 59) /* only when !PMD_SECT_VALID */
+
 #ifndef __ASSEMBLY__
 
 #include <asm/cpufeature.h>
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d5d3fbe739534f..d8258ae8fce09c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -145,6 +145,18 @@ static inline pte_t set_pte_bit(pte_t pte, pgprot_t prot)
 	return pte;
 }
 
+static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+	pmd_val(pmd) &= ~pgprot_val(prot);
+	return pmd;
+}
+
+static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
+{
+	pmd_val(pmd) |= pgprot_val(prot);
+	return pmd;
+}
+
 static inline pte_t pte_wrprotect(pte_t pte)
 {
 	pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
@@ -363,15 +375,24 @@ static inline int pmd_protnone(pmd_t pmd)
 }
 #endif
 
+#define pmd_present_invalid(pmd)     (!!(pmd_val(pmd) & PMD_PRESENT_INVALID))
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pte_present(pmd_pte(pmd)) || pmd_present_invalid(pmd);
+}
+
 /*
  * THP definitions.
  */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && pmd_present(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-#define pmd_present(pmd)	pte_present(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_valid(pmd)		pte_valid(pmd_pte(pmd))
@@ -381,7 +402,14 @@ static inline int pmd_protnone(pmd_t pmd)
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkinvalid(pmd)	(__pmd(pmd_val(pmd) & ~PMD_SECT_VALID))
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+	pmd = set_pmd_bit(pmd, __pgprot(PMD_PRESENT_INVALID));
+	pmd = clear_pmd_bit(pmd, __pgprot(PMD_SECT_VALID));
+
+	return pmd;
+}
 
 #define pmd_thp_or_huge(pmd)	(pmd_huge(pmd) || pmd_trans_huge(pmd))
 

From 53fa117bb33c9ebc4c0746b3830e273f5e9b97b7 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 9 Sep 2020 10:23:03 +0530
Subject: [PATCH 102/262] arm64/mm: Enable THP migration

In certain page migration situations, a THP page can be migrated without
being split into it's constituent subpages. This saves time required to
split a THP and put it back together when required. But it also saves an
wider address range translation covered by a single TLB entry, reducing
future page fault costs.

A previous patch changed platform THP helpers per generic memory semantics,
clearing the path for THP migration support. This adds two more THP helpers
required to create PMD migration swap entries. Now enable THP migration via
ARCH_ENABLE_THP_MIGRATION.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/1599627183-14453-3-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig               | 4 ++++
 arch/arm64/include/asm/pgtable.h | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d232837cbeee8..e21b94061780b1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1876,6 +1876,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION
 	def_bool y
 	depends on HUGETLB_PAGE && MIGRATION
 
+config ARCH_ENABLE_THP_MIGRATION
+	def_bool y
+	depends on TRANSPARENT_HUGEPAGE
+
 menu "Power management options"
 
 source "kernel/power/Kconfig"
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d8258ae8fce09c..bc68da9f5706e4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -875,6 +875,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(swp)	((pte_t) { (swp).val })
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define __pmd_to_swp_entry(pmd)		((swp_entry_t) { pmd_val(pmd) })
+#define __swp_entry_to_pmd(swp)		__pmd((swp).val)
+#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
+
 /*
  * Ensure that there are not more swap files than can be encoded in the kernel
  * PTEs.

From 4e56de82d4ec9d98ffdc9e0387d8fdecbf496226 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 9 Sep 2020 11:18:55 +0530
Subject: [PATCH 103/262] arm64/cpuinfo: Define HWCAP name arrays per their
 actual bit definitions

HWCAP name arrays (hwcap_str, compat_hwcap_str, compat_hwcap2_str) that are
scanned for /proc/cpuinfo are detached from their bit definitions making it
vulnerable and difficult to correlate. It is also bit problematic because
during /proc/cpuinfo dump these arrays get traversed sequentially assuming
they reflect and match actual HWCAP bit sequence, to test various features
for a given CPU. This redefines name arrays per their HWCAP bit definitions
. It also warns after detecting any feature which is not expected on arm64.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Dave Martin <Dave.Martin@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/1599630535-29337-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/hwcap.h |   9 ++
 arch/arm64/kernel/cpuinfo.c    | 177 +++++++++++++++++----------------
 2 files changed, 101 insertions(+), 85 deletions(-)

diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 22f73fe0903099..6493a4c63a2ff6 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -8,18 +8,27 @@
 #include <uapi/asm/hwcap.h>
 #include <asm/cpufeature.h>
 
+#define COMPAT_HWCAP_SWP	(1 << 0)
 #define COMPAT_HWCAP_HALF	(1 << 1)
 #define COMPAT_HWCAP_THUMB	(1 << 2)
+#define COMPAT_HWCAP_26BIT	(1 << 3)
 #define COMPAT_HWCAP_FAST_MULT	(1 << 4)
+#define COMPAT_HWCAP_FPA	(1 << 5)
 #define COMPAT_HWCAP_VFP	(1 << 6)
 #define COMPAT_HWCAP_EDSP	(1 << 7)
+#define COMPAT_HWCAP_JAVA	(1 << 8)
+#define COMPAT_HWCAP_IWMMXT	(1 << 9)
+#define COMPAT_HWCAP_CRUNCH	(1 << 10)
+#define COMPAT_HWCAP_THUMBEE	(1 << 11)
 #define COMPAT_HWCAP_NEON	(1 << 12)
 #define COMPAT_HWCAP_VFPv3	(1 << 13)
+#define COMPAT_HWCAP_VFPV3D16	(1 << 14)
 #define COMPAT_HWCAP_TLS	(1 << 15)
 #define COMPAT_HWCAP_VFPv4	(1 << 16)
 #define COMPAT_HWCAP_IDIVA	(1 << 17)
 #define COMPAT_HWCAP_IDIVT	(1 << 18)
 #define COMPAT_HWCAP_IDIV	(COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT)
+#define COMPAT_HWCAP_VFPD32	(1 << 19)
 #define COMPAT_HWCAP_LPAE	(1 << 20)
 #define COMPAT_HWCAP_EVTSTRM	(1 << 21)
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index d0076c2159e667..25113245825c0b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -43,94 +43,92 @@ static const char *icache_policy_str[] = {
 unsigned long __icache_flags;
 
 static const char *const hwcap_str[] = {
-	"fp",
-	"asimd",
-	"evtstrm",
-	"aes",
-	"pmull",
-	"sha1",
-	"sha2",
-	"crc32",
-	"atomics",
-	"fphp",
-	"asimdhp",
-	"cpuid",
-	"asimdrdm",
-	"jscvt",
-	"fcma",
-	"lrcpc",
-	"dcpop",
-	"sha3",
-	"sm3",
-	"sm4",
-	"asimddp",
-	"sha512",
-	"sve",
-	"asimdfhm",
-	"dit",
-	"uscat",
-	"ilrcpc",
-	"flagm",
-	"ssbs",
-	"sb",
-	"paca",
-	"pacg",
-	"dcpodp",
-	"sve2",
-	"sveaes",
-	"svepmull",
-	"svebitperm",
-	"svesha3",
-	"svesm4",
-	"flagm2",
-	"frint",
-	"svei8mm",
-	"svef32mm",
-	"svef64mm",
-	"svebf16",
-	"i8mm",
-	"bf16",
-	"dgh",
-	"rng",
-	"bti",
-	/* reserved for "mte" */
-	NULL
+	[KERNEL_HWCAP_FP]		= "fp",
+	[KERNEL_HWCAP_ASIMD]		= "asimd",
+	[KERNEL_HWCAP_EVTSTRM]		= "evtstrm",
+	[KERNEL_HWCAP_AES]		= "aes",
+	[KERNEL_HWCAP_PMULL]		= "pmull",
+	[KERNEL_HWCAP_SHA1]		= "sha1",
+	[KERNEL_HWCAP_SHA2]		= "sha2",
+	[KERNEL_HWCAP_CRC32]		= "crc32",
+	[KERNEL_HWCAP_ATOMICS]		= "atomics",
+	[KERNEL_HWCAP_FPHP]		= "fphp",
+	[KERNEL_HWCAP_ASIMDHP]		= "asimdhp",
+	[KERNEL_HWCAP_CPUID]		= "cpuid",
+	[KERNEL_HWCAP_ASIMDRDM]		= "asimdrdm",
+	[KERNEL_HWCAP_JSCVT]		= "jscvt",
+	[KERNEL_HWCAP_FCMA]		= "fcma",
+	[KERNEL_HWCAP_LRCPC]		= "lrcpc",
+	[KERNEL_HWCAP_DCPOP]		= "dcpop",
+	[KERNEL_HWCAP_SHA3]		= "sha3",
+	[KERNEL_HWCAP_SM3]		= "sm3",
+	[KERNEL_HWCAP_SM4]		= "sm4",
+	[KERNEL_HWCAP_ASIMDDP]		= "asimddp",
+	[KERNEL_HWCAP_SHA512]		= "sha512",
+	[KERNEL_HWCAP_SVE]		= "sve",
+	[KERNEL_HWCAP_ASIMDFHM]		= "asimdfhm",
+	[KERNEL_HWCAP_DIT]		= "dit",
+	[KERNEL_HWCAP_USCAT]		= "uscat",
+	[KERNEL_HWCAP_ILRCPC]		= "ilrcpc",
+	[KERNEL_HWCAP_FLAGM]		= "flagm",
+	[KERNEL_HWCAP_SSBS]		= "ssbs",
+	[KERNEL_HWCAP_SB]		= "sb",
+	[KERNEL_HWCAP_PACA]		= "paca",
+	[KERNEL_HWCAP_PACG]		= "pacg",
+	[KERNEL_HWCAP_DCPODP]		= "dcpodp",
+	[KERNEL_HWCAP_SVE2]		= "sve2",
+	[KERNEL_HWCAP_SVEAES]		= "sveaes",
+	[KERNEL_HWCAP_SVEPMULL]		= "svepmull",
+	[KERNEL_HWCAP_SVEBITPERM]	= "svebitperm",
+	[KERNEL_HWCAP_SVESHA3]		= "svesha3",
+	[KERNEL_HWCAP_SVESM4]		= "svesm4",
+	[KERNEL_HWCAP_FLAGM2]		= "flagm2",
+	[KERNEL_HWCAP_FRINT]		= "frint",
+	[KERNEL_HWCAP_SVEI8MM]		= "svei8mm",
+	[KERNEL_HWCAP_SVEF32MM]		= "svef32mm",
+	[KERNEL_HWCAP_SVEF64MM]		= "svef64mm",
+	[KERNEL_HWCAP_SVEBF16]		= "svebf16",
+	[KERNEL_HWCAP_I8MM]		= "i8mm",
+	[KERNEL_HWCAP_BF16]		= "bf16",
+	[KERNEL_HWCAP_DGH]		= "dgh",
+	[KERNEL_HWCAP_RNG]		= "rng",
+	[KERNEL_HWCAP_BTI]		= "bti",
 };
 
 #ifdef CONFIG_COMPAT
+#define COMPAT_KERNEL_HWCAP(x)	const_ilog2(COMPAT_HWCAP_ ## x)
 static const char *const compat_hwcap_str[] = {
-	"swp",
-	"half",
-	"thumb",
-	"26bit",
-	"fastmult",
-	"fpa",
-	"vfp",
-	"edsp",
-	"java",
-	"iwmmxt",
-	"crunch",
-	"thumbee",
-	"neon",
-	"vfpv3",
-	"vfpv3d16",
-	"tls",
-	"vfpv4",
-	"idiva",
-	"idivt",
-	"vfpd32",
-	"lpae",
-	"evtstrm",
-	NULL
+	[COMPAT_KERNEL_HWCAP(SWP)]	= "swp",
+	[COMPAT_KERNEL_HWCAP(HALF)]	= "half",
+	[COMPAT_KERNEL_HWCAP(THUMB)]	= "thumb",
+	[COMPAT_KERNEL_HWCAP(26BIT)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(FAST_MULT)] = "fastmult",
+	[COMPAT_KERNEL_HWCAP(FPA)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(VFP)]	= "vfp",
+	[COMPAT_KERNEL_HWCAP(EDSP)]	= "edsp",
+	[COMPAT_KERNEL_HWCAP(JAVA)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(IWMMXT)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(CRUNCH)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(THUMBEE)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(NEON)]	= "neon",
+	[COMPAT_KERNEL_HWCAP(VFPv3)]	= "vfpv3",
+	[COMPAT_KERNEL_HWCAP(VFPV3D16)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(TLS)]	= "tls",
+	[COMPAT_KERNEL_HWCAP(VFPv4)]	= "vfpv4",
+	[COMPAT_KERNEL_HWCAP(IDIVA)]	= "idiva",
+	[COMPAT_KERNEL_HWCAP(IDIVT)]	= "idivt",
+	[COMPAT_KERNEL_HWCAP(VFPD32)]	= NULL,	/* Not possible on arm64 */
+	[COMPAT_KERNEL_HWCAP(LPAE)]	= "lpae",
+	[COMPAT_KERNEL_HWCAP(EVTSTRM)]	= "evtstrm",
 };
 
+#define COMPAT_KERNEL_HWCAP2(x)	const_ilog2(COMPAT_HWCAP2_ ## x)
 static const char *const compat_hwcap2_str[] = {
-	"aes",
-	"pmull",
-	"sha1",
-	"sha2",
-	"crc32",
-	NULL
+	[COMPAT_KERNEL_HWCAP2(AES)]	= "aes",
+	[COMPAT_KERNEL_HWCAP2(PMULL)]	= "pmull",
+	[COMPAT_KERNEL_HWCAP2(SHA1)]	= "sha1",
+	[COMPAT_KERNEL_HWCAP2(SHA2)]	= "sha2",
+	[COMPAT_KERNEL_HWCAP2(CRC32)]	= "crc32",
 };
 #endif /* CONFIG_COMPAT */
 
@@ -166,16 +164,25 @@ static int c_show(struct seq_file *m, void *v)
 		seq_puts(m, "Features\t:");
 		if (compat) {
 #ifdef CONFIG_COMPAT
-			for (j = 0; compat_hwcap_str[j]; j++)
-				if (compat_elf_hwcap & (1 << j))
+			for (j = 0; j < ARRAY_SIZE(compat_hwcap_str); j++) {
+				if (compat_elf_hwcap & (1 << j)) {
+					/*
+					 * Warn once if any feature should not
+					 * have been present on arm64 platform.
+					 */
+					if (WARN_ON_ONCE(!compat_hwcap_str[j]))
+						continue;
+
 					seq_printf(m, " %s", compat_hwcap_str[j]);
+				}
+			}
 
-			for (j = 0; compat_hwcap2_str[j]; j++)
+			for (j = 0; j < ARRAY_SIZE(compat_hwcap2_str); j++)
 				if (compat_elf_hwcap2 & (1 << j))
 					seq_printf(m, " %s", compat_hwcap2_str[j]);
 #endif /* CONFIG_COMPAT */
 		} else {
-			for (j = 0; hwcap_str[j]; j++)
+			for (j = 0; j < ARRAY_SIZE(hwcap_str); j++)
 				if (cpu_have_feature(j))
 					seq_printf(m, " %s", hwcap_str[j]);
 		}

From 11e339d53a739e4cf885c1e2a843a4004204d271 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Sep 2020 19:59:34 +1000
Subject: [PATCH 104/262] arm64/mm: Remove CONT_RANGE_OFFSET

The macro was introduced by commit <ecf35a237a85> ("arm64: PTE/PMD
contiguous bit definition") at the beginning. It's only used by
commit <348a65cdcbbf> ("arm64: Mark kernel page ranges contiguous"),
which was reverted later by commit <667c27597ca8>. This makes the
macro unused.

This removes the unused macro (CONT_RANGE_OFFSET).

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200910095936.20307-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable-hwdef.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index d400a4d9aee249..8a399e6668370e 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -98,8 +98,6 @@
 #define CONT_PMDS		(1 << (CONT_PMD_SHIFT - PMD_SHIFT))
 #define CONT_PMD_SIZE		(CONT_PMDS * PMD_SIZE)
 #define CONT_PMD_MASK		(~(CONT_PMD_SIZE - 1))
-/* the numerical offset of the PTE within a range of CONT_PTES */
-#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
 
 /*
  * Hardware page table definitions.

From c0d6de327f18d39ef759aa883bffc3c32bc69015 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Sep 2020 19:59:35 +1000
Subject: [PATCH 105/262] arm64/mm: Unify CONT_PTE_SHIFT

CONT_PTE_SHIFT actually depends on CONFIG_ARM64_CONT_SHIFT. It's
reasonable to reflect the dependency:

   * This renames CONFIG_ARM64_CONT_SHIFT to CONFIG_ARM64_CONT_PTE_SHIFT,
     so that we can introduce CONFIG_ARM64_CONT_PMD_SHIFT later.

   * CONT_{SHIFT, SIZE, MASK}, defined in page-def.h are removed as they
     are not used by anyone.

   * CONT_PTE_SHIFT is determined by CONFIG_ARM64_CONT_PTE_SHIFT.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200910095936.20307-2-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig                     | 2 +-
 arch/arm64/include/asm/page-def.h      | 5 -----
 arch/arm64/include/asm/pgtable-hwdef.h | 4 +---
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e21b94061780b1..4f3b7b24b5e572 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -211,7 +211,7 @@ config ARM64_PAGE_SHIFT
 	default 14 if ARM64_16K_PAGES
 	default 12
 
-config ARM64_CONT_SHIFT
+config ARM64_CONT_PTE_SHIFT
 	int
 	default 5 if ARM64_64K_PAGES
 	default 7 if ARM64_16K_PAGES
diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h
index f99d48ecbeefe2..2403f7b4cdbfb6 100644
--- a/arch/arm64/include/asm/page-def.h
+++ b/arch/arm64/include/asm/page-def.h
@@ -11,13 +11,8 @@
 #include <linux/const.h>
 
 /* PAGE_SHIFT determines the page size */
-/* CONT_SHIFT determines the number of pages which can be tracked together  */
 #define PAGE_SHIFT		CONFIG_ARM64_PAGE_SHIFT
-#define CONT_SHIFT		CONFIG_ARM64_CONT_SHIFT
 #define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
-#define CONT_MASK		(~(CONT_SIZE-1))
-
 #endif /* __ASM_PAGE_DEF_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 8a399e6668370e..6c9c67f6255184 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -81,14 +81,12 @@
 /*
  * Contiguous page definitions.
  */
+#define CONT_PTE_SHIFT		(CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT)
 #ifdef CONFIG_ARM64_64K_PAGES
-#define CONT_PTE_SHIFT		(5 + PAGE_SHIFT)
 #define CONT_PMD_SHIFT		(5 + PMD_SHIFT)
 #elif defined(CONFIG_ARM64_16K_PAGES)
-#define CONT_PTE_SHIFT		(7 + PAGE_SHIFT)
 #define CONT_PMD_SHIFT		(5 + PMD_SHIFT)
 #else
-#define CONT_PTE_SHIFT		(4 + PAGE_SHIFT)
 #define CONT_PMD_SHIFT		(4 + PMD_SHIFT)
 #endif
 

From e676594115f0bcc12d4791f9ee919e20d8d750ee Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Sep 2020 19:59:36 +1000
Subject: [PATCH 106/262] arm64/mm: Unify CONT_PMD_SHIFT

Similar to how CONT_PTE_SHIFT is determined, this introduces a new
kernel option (CONFIG_CONT_PMD_SHIFT) to determine CONT_PMD_SHIFT.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200910095936.20307-3-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig                     |  6 ++++++
 arch/arm64/include/asm/pgtable-hwdef.h | 10 ++--------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4f3b7b24b5e572..609629e3677918 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -217,6 +217,12 @@ config ARM64_CONT_PTE_SHIFT
 	default 7 if ARM64_16K_PAGES
 	default 4
 
+config ARM64_CONT_PMD_SHIFT
+	int
+	default 5 if ARM64_64K_PAGES
+	default 5 if ARM64_16K_PAGES
+	default 4
+
 config ARCH_MMAP_RND_BITS_MIN
        default 14 if ARM64_64K_PAGES
        default 16 if ARM64_16K_PAGES
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 6c9c67f6255184..94b3f2ac2e9dba 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -82,17 +82,11 @@
  * Contiguous page definitions.
  */
 #define CONT_PTE_SHIFT		(CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT)
-#ifdef CONFIG_ARM64_64K_PAGES
-#define CONT_PMD_SHIFT		(5 + PMD_SHIFT)
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define CONT_PMD_SHIFT		(5 + PMD_SHIFT)
-#else
-#define CONT_PMD_SHIFT		(4 + PMD_SHIFT)
-#endif
-
 #define CONT_PTES		(1 << (CONT_PTE_SHIFT - PAGE_SHIFT))
 #define CONT_PTE_SIZE		(CONT_PTES * PAGE_SIZE)
 #define CONT_PTE_MASK		(~(CONT_PTE_SIZE - 1))
+
+#define CONT_PMD_SHIFT		(CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT)
 #define CONT_PMDS		(1 << (CONT_PMD_SHIFT - PMD_SHIFT))
 #define CONT_PMD_SIZE		(CONT_PMDS * PMD_SIZE)
 #define CONT_PMD_MASK		(~(CONT_PMD_SIZE - 1))

From 2a493132146152c21587414d96ba9026e43acc3d Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 9 Sep 2020 17:28:02 +0800
Subject: [PATCH 107/262] arm64: Remove the unused include statements

linux/arm-smccc.h is included more than once, Remove the one that isn't
necessary.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Link: https://lore.kernel.org/r/1599643682-10404-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu_errata.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index c332d49780dc96..9df3f9bb795026 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -169,8 +169,6 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 }
 #endif	/* CONFIG_KVM_INDIRECT_VECTORS */
 
-#include <linux/arm-smccc.h>
-
 static void __maybe_unused call_smc_arch_workaround_1(void)
 {
 	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);

From 72789a4a6a914601912a8992b0d43bd5abc05128 Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Fri, 28 Aug 2020 21:39:57 +0800
Subject: [PATCH 108/262] arm64/relocate_kernel: remove redundant code

Kernel startup entry point requires disabling MMU and D-cache.

As for kexec-reboot, taking a close look at "msr sctlr_el1, x12" in
__cpu_soft_restart as the following:

-1. booted at EL1
The instruction is enough to disable MMU and I/D cache for
EL1 regime.

-2. booted at EL2, using VHE
Access to SCTLR_EL1 is redirected to SCTLR_EL2 in EL2.  So the instruction
is enough to disable MMU and clear I+C bits for EL2 regime.

-3. booted at EL2, not using VHE
The instruction itself can not affect EL2 regime. But The hyp-stub doesn't
enable the MMU and I/D cache for EL2 regime. And KVM also disable them for EL2
regime when its unloaded, or execute a HVC_SOFT_RESTART call. So when
kexec-reboot, the code in KVM has prepare the requirement.

As a conclusion, disabling MMU and clearing I+C bits in
SYM_CODE_START(arm64_relocate_new_kernel) is redundant, and can be removed

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: James Morse <james.morse@arm.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Remi Denis-Courmont <remi.denis.courmont@huawei.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: kvmarm@lists.cs.columbia.edu
Link: https://lore.kernel.org/r/1598621998-20563-1-git-send-email-kernelfans@gmail.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu-reset.S       |  4 ++++
 arch/arm64/kernel/relocate_kernel.S | 12 ------------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 4a18055b2ff9d1..37721eb6f9a145 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -35,6 +35,10 @@ SYM_CODE_START(__cpu_soft_restart)
 	mov_q	x13, SCTLR_ELx_FLAGS
 	bic	x12, x12, x13
 	pre_disable_mmu_workaround
+	/*
+	 * either disable EL1&0 translation regime or disable EL2&0 translation
+	 * regime if HCR_EL2.E2H == 1
+	 */
 	msr	sctlr_el1, x12
 	isb
 
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 542d6edc6806ac..84eec95ec06cc5 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -36,18 +36,6 @@ SYM_CODE_START(arm64_relocate_new_kernel)
 	mov	x14, xzr			/* x14 = entry ptr */
 	mov	x13, xzr			/* x13 = copy dest */
 
-	/* Clear the sctlr_el2 flags. */
-	mrs	x0, CurrentEL
-	cmp	x0, #CurrentEL_EL2
-	b.ne	1f
-	mrs	x0, sctlr_el2
-	mov_q	x1, SCTLR_ELx_FLAGS
-	bic	x0, x0, x1
-	pre_disable_mmu_workaround
-	msr	sctlr_el2, x0
-	isb
-1:
-
 	/* Check if the new image needs relocation. */
 	tbnz	x16, IND_DONE_BIT, .Ldone
 

From 3a1793066fdfbbd657d9591cc03d6e1b6634a587 Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Fri, 28 Aug 2020 21:39:58 +0800
Subject: [PATCH 109/262] Documentation/kvm/arm: improve description of
 HVC_SOFT_RESTART

Besides disabling MMU, HVC_SOFT_RESTART also clears I+D bits. These behaviors
are what kexec-reboot expects, so describe it more precisely.

Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: linux-doc@vger.kernel.org
Cc: kvmarm@lists.cs.columbia.edu
Link: https://lore.kernel.org/r/1598621998-20563-2-git-send-email-kernelfans@gmail.com
To: linux-arm-kernel@lists.infradead.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/virt/kvm/arm/hyp-abi.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst
index d9eba93aa36426..83cadd8186faf1 100644
--- a/Documentation/virt/kvm/arm/hyp-abi.rst
+++ b/Documentation/virt/kvm/arm/hyp-abi.rst
@@ -54,9 +54,9 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
     x3 = x1's value when entering the next payload (arm64)
     x4 = x2's value when entering the next payload (arm64)
 
-  Mask all exceptions, disable the MMU, move the arguments into place
-  (arm64 only), and jump to the restart address while at HYP/EL2. This
-  hypercall is not expected to return to its caller.
+  Mask all exceptions, disable the MMU, clear I+D bits, move the arguments
+  into place (arm64 only), and jump to the restart address while at HYP/EL2.
+  This hypercall is not expected to return to its caller.
 
 Any other value of r0/x0 triggers a hypervisor-specific handling,
 which is not documented here.

From 93396936ed0ce2c6f44140bd14728611d0bb065e Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Mon, 14 Sep 2020 14:06:51 +0530
Subject: [PATCH 110/262] arm64: kprobe: add checks for ARMv8.3-PAuth combined
 instructions

Currently the ARMv8.3-PAuth combined branch instructions (braa, retaa
etc.) are not simulated for out-of-line execution with a handler. Hence the
uprobe of such instructions leads to kernel warnings in a loop as they are
not explicitly checked and fall into INSN_GOOD categories. Other combined
instructions like LDRAA and LDRBB can be probed.

The issue of the combined branch instructions is fixed by adding
group definitions of all such instructions and rejecting their probes.
The instruction groups added are br_auth(braa, brab, braaz and brabz),
blr_auth(blraa, blrab, blraaz and blrabz), ret_auth(retaa and retab) and
eret_auth(eretaa and eretab).

Warning log:
 WARNING: CPU: 0 PID: 156 at arch/arm64/kernel/probes/uprobes.c:182 uprobe_single_step_handler+0x34/0x50
 Modules linked in:
 CPU: 0 PID: 156 Comm: func Not tainted 5.9.0-rc3 #188
 Hardware name: Foundation-v8A (DT)
 pstate: 804003c9 (Nzcv DAIF +PAN -UAO BTYPE=--)
 pc : uprobe_single_step_handler+0x34/0x50
 lr : single_step_handler+0x70/0xf8
 sp : ffff800012af3e30
 x29: ffff800012af3e30 x28: ffff000878723b00
 x27: 0000000000000000 x26: 0000000000000000
 x25: 0000000000000000 x24: 0000000000000000
 x23: 0000000060001000 x22: 00000000cb000022
 x21: ffff800012065ce8 x20: ffff800012af3ec0
 x19: ffff800012068d50 x18: 0000000000000000
 x17: 0000000000000000 x16: 0000000000000000
 x15: 0000000000000000 x14: 0000000000000000
 x13: 0000000000000000 x12: 0000000000000000
 x11: 0000000000000000 x10: 0000000000000000
 x9 : ffff800010085c90 x8 : 0000000000000000
 x7 : 0000000000000000 x6 : ffff80001205a9c8
 x5 : ffff80001205a000 x4 : ffff80001233db80
 x3 : ffff8000100a7a60 x2 : 0020000000000003
 x1 : 0000fffffffff008 x0 : ffff800012af3ec0
 Call trace:
  uprobe_single_step_handler+0x34/0x50
  single_step_handler+0x70/0xf8
  do_debug_exception+0xb8/0x130
  el0_sync_handler+0x138/0x1b8
  el0_sync+0x158/0x180

Fixes: 74afda4016a7 ("arm64: compile the kernel with ptrauth return address signing")
Fixes: 04ca3204fa09 ("arm64: enable pointer authentication")
Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200914083656.21428-2-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h          | 4 ++++
 arch/arm64/kernel/insn.c               | 5 ++++-
 arch/arm64/kernel/probes/decode-insn.c | 3 ++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 0bc46149e49175..4b39293d0f72dd 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -359,9 +359,13 @@ __AARCH64_INSN_FUNCS(brk,	0xFFE0001F, 0xD4200000)
 __AARCH64_INSN_FUNCS(exception,	0xFF000000, 0xD4000000)
 __AARCH64_INSN_FUNCS(hint,	0xFFFFF01F, 0xD503201F)
 __AARCH64_INSN_FUNCS(br,	0xFFFFFC1F, 0xD61F0000)
+__AARCH64_INSN_FUNCS(br_auth,	0xFEFFF800, 0xD61F0800)
 __AARCH64_INSN_FUNCS(blr,	0xFFFFFC1F, 0xD63F0000)
+__AARCH64_INSN_FUNCS(blr_auth,	0xFEFFF800, 0xD63F0800)
 __AARCH64_INSN_FUNCS(ret,	0xFFFFFC1F, 0xD65F0000)
+__AARCH64_INSN_FUNCS(ret_auth,	0xFFFFFBFF, 0xD65F0BFF)
 __AARCH64_INSN_FUNCS(eret,	0xFFFFFFFF, 0xD69F03E0)
+__AARCH64_INSN_FUNCS(eret_auth,	0xFFFFFBFF, 0xD69F0BFF)
 __AARCH64_INSN_FUNCS(mrs,	0xFFF00000, 0xD5300000)
 __AARCH64_INSN_FUNCS(msr_imm,	0xFFF8F01F, 0xD500401F)
 __AARCH64_INSN_FUNCS(msr_reg,	0xFFF00000, 0xD5100000)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index a107375005bc9c..ccc8c9e22b2581 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -176,7 +176,7 @@ bool __kprobes aarch64_insn_uses_literal(u32 insn)
 
 bool __kprobes aarch64_insn_is_branch(u32 insn)
 {
-	/* b, bl, cb*, tb*, b.cond, br, blr */
+	/* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
 
 	return aarch64_insn_is_b(insn) ||
 		aarch64_insn_is_bl(insn) ||
@@ -185,8 +185,11 @@ bool __kprobes aarch64_insn_is_branch(u32 insn)
 		aarch64_insn_is_tbz(insn) ||
 		aarch64_insn_is_tbnz(insn) ||
 		aarch64_insn_is_ret(insn) ||
+		aarch64_insn_is_ret_auth(insn) ||
 		aarch64_insn_is_br(insn) ||
+		aarch64_insn_is_br_auth(insn) ||
 		aarch64_insn_is_blr(insn) ||
+		aarch64_insn_is_blr_auth(insn) ||
 		aarch64_insn_is_bcond(insn);
 }
 
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 263d5fba4c8a3c..c541fb48886e3b 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -29,7 +29,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
 		    aarch64_insn_is_msr_imm(insn) ||
 		    aarch64_insn_is_msr_reg(insn) ||
 		    aarch64_insn_is_exception(insn) ||
-		    aarch64_insn_is_eret(insn))
+		    aarch64_insn_is_eret(insn) ||
+		    aarch64_insn_is_eret_auth(insn))
 			return false;
 
 		/*

From 4ef333b2d10680b5d966a733ed7171f72164fcd5 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Mon, 14 Sep 2020 14:06:52 +0530
Subject: [PATCH 111/262] arm64: traps: Allow force_signal_inject to pass esr
 error code

Some error signal need to pass proper ARM esr error code to userspace to
better identify the cause of the signal. So the function
force_signal_inject is extended to pass this as a parameter. The
existing code is not affected by this change.

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200914083656.21428-3-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/traps.h |  2 +-
 arch/arm64/kernel/fpsimd.c     |  4 ++--
 arch/arm64/kernel/traps.c      | 14 +++++++-------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index cee5928e1b7d5c..d96dc2c7c09dab 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -24,7 +24,7 @@ struct undef_hook {
 
 void register_undef_hook(struct undef_hook *hook);
 void unregister_undef_hook(struct undef_hook *hook);
-void force_signal_inject(int signal, int code, unsigned long address);
+void force_signal_inject(int signal, int code, unsigned long address, unsigned int err);
 void arm64_notify_segfault(unsigned long addr);
 void arm64_force_sig_fault(int signo, int code, void __user *addr, const char *str);
 void arm64_force_sig_mceerr(int code, void __user *addr, short lsb, const char *str);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 55c8f3ec6705d0..77484359d44a74 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -312,7 +312,7 @@ static void fpsimd_save(void)
 				 * re-enter user with corrupt state.
 				 * There's no way to recover, so kill it:
 				 */
-				force_signal_inject(SIGKILL, SI_KERNEL, 0);
+				force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
 				return;
 			}
 
@@ -936,7 +936,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 {
 	/* Even if we chose not to use SVE, the hardware could still trap: */
 	if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
-		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 		return;
 	}
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 13ebd5ca20706a..29fd00fe94f2e1 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -412,7 +412,7 @@ static int call_undef_hook(struct pt_regs *regs)
 	return fn ? fn(regs, instr) : 1;
 }
 
-void force_signal_inject(int signal, int code, unsigned long address)
+void force_signal_inject(int signal, int code, unsigned long address, unsigned int err)
 {
 	const char *desc;
 	struct pt_regs *regs = current_pt_regs();
@@ -438,7 +438,7 @@ void force_signal_inject(int signal, int code, unsigned long address)
 		signal = SIGKILL;
 	}
 
-	arm64_notify_die(desc, regs, signal, code, (void __user *)address, 0);
+	arm64_notify_die(desc, regs, signal, code, (void __user *)address, err);
 }
 
 /*
@@ -455,7 +455,7 @@ void arm64_notify_segfault(unsigned long addr)
 		code = SEGV_ACCERR;
 	mmap_read_unlock(current->mm);
 
-	force_signal_inject(SIGSEGV, code, addr);
+	force_signal_inject(SIGSEGV, code, addr, 0);
 }
 
 void do_undefinstr(struct pt_regs *regs)
@@ -468,14 +468,14 @@ void do_undefinstr(struct pt_regs *regs)
 		return;
 
 	BUG_ON(!user_mode(regs));
-	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 }
 NOKPROBE_SYMBOL(do_undefinstr);
 
 void do_bti(struct pt_regs *regs)
 {
 	BUG_ON(!user_mode(regs));
-	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+	force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 }
 NOKPROBE_SYMBOL(do_bti);
 
@@ -528,7 +528,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 		__user_cache_maint("ic ivau", address, ret);
 		break;
 	default:
-		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 		return;
 	}
 
@@ -581,7 +581,7 @@ static void mrs_handler(unsigned int esr, struct pt_regs *regs)
 	sysreg = esr_sys64_to_sysreg(esr);
 
 	if (do_emulate_mrs(regs, sysreg, rt) != 0)
-		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
+		force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
 }
 
 static void wfi_handler(unsigned int esr, struct pt_regs *regs)

From e16aeb072682d3dcdbdad452c974baa0d2b0c6db Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Mon, 14 Sep 2020 14:06:53 +0530
Subject: [PATCH 112/262] arm64: ptrauth: Introduce Armv8.3 pointer
 authentication enhancements

Some Armv8.3 Pointer Authentication enhancements have been introduced
which are mandatory for Armv8.6 and optional for Armv8.3. These features
are,

* ARMv8.3-PAuth2 - An enhanced PAC generation logic is added which hardens
  finding the correct PAC value of the authenticated pointer.

* ARMv8.3-FPAC - Fault is generated now when the ptrauth authentication
  instruction fails in authenticating the PAC present in the address.
  This is different from earlier case when such failures just adds an
  error code in the top byte and waits for subsequent load/store to abort.
  The ptrauth instructions which may cause this fault are autiasp, retaa
  etc.

The above features are now represented by additional configurations
for the Address Authentication cpufeature and a new ESR exception class.

The userspace fault received in the kernel due to ARMv8.3-FPAC is treated
as Illegal instruction and hence signal SIGILL is injected with ILL_ILLOPN
as the signal code. Note that this is different from earlier ARMv8.3
ptrauth where signal SIGSEGV is issued due to Pointer authentication
failures. The in-kernel PAC fault causes kernel to crash.

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200914083656.21428-4-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/esr.h       |  4 +++-
 arch/arm64/include/asm/exception.h |  1 +
 arch/arm64/include/asm/sysreg.h    | 24 ++++++++++++++++--------
 arch/arm64/kernel/entry-common.c   | 21 +++++++++++++++++++++
 arch/arm64/kernel/traps.c          | 12 ++++++++++++
 5 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 035003acfa876d..22c81f1edda2a1 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -35,7 +35,9 @@
 #define ESR_ELx_EC_SYS64	(0x18)
 #define ESR_ELx_EC_SVE		(0x19)
 #define ESR_ELx_EC_ERET		(0x1a)	/* EL2 only */
-/* Unallocated EC: 0x1b - 0x1E */
+/* Unallocated EC: 0x1B */
+#define ESR_ELx_EC_FPAC		(0x1C)	/* EL1 and above */
+/* Unallocated EC: 0x1D - 0x1E */
 #define ESR_ELx_EC_IMP_DEF	(0x1f)	/* EL3 only */
 #define ESR_ELx_EC_IABT_LOW	(0x20)
 #define ESR_ELx_EC_IABT_CUR	(0x21)
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 7577a754d44343..99b9383cd036d7 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -47,4 +47,5 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
 void do_cp15instr(unsigned int esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
+void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 554a7e8ecb0746..b738bc7933697d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -636,14 +636,22 @@
 #define ID_AA64ISAR1_APA_SHIFT		4
 #define ID_AA64ISAR1_DPB_SHIFT		0
 
-#define ID_AA64ISAR1_APA_NI		0x0
-#define ID_AA64ISAR1_APA_ARCHITECTED	0x1
-#define ID_AA64ISAR1_API_NI		0x0
-#define ID_AA64ISAR1_API_IMP_DEF	0x1
-#define ID_AA64ISAR1_GPA_NI		0x0
-#define ID_AA64ISAR1_GPA_ARCHITECTED	0x1
-#define ID_AA64ISAR1_GPI_NI		0x0
-#define ID_AA64ISAR1_GPI_IMP_DEF	0x1
+#define ID_AA64ISAR1_APA_NI			0x0
+#define ID_AA64ISAR1_APA_ARCHITECTED		0x1
+#define ID_AA64ISAR1_APA_ARCH_EPAC		0x2
+#define ID_AA64ISAR1_APA_ARCH_EPAC2		0x3
+#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC	0x4
+#define ID_AA64ISAR1_APA_ARCH_EPAC2_FPAC_CMB	0x5
+#define ID_AA64ISAR1_API_NI			0x0
+#define ID_AA64ISAR1_API_IMP_DEF		0x1
+#define ID_AA64ISAR1_API_IMP_DEF_EPAC		0x2
+#define ID_AA64ISAR1_API_IMP_DEF_EPAC2		0x3
+#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC	0x4
+#define ID_AA64ISAR1_API_IMP_DEF_EPAC2_FPAC_CMB	0x5
+#define ID_AA64ISAR1_GPA_NI			0x0
+#define ID_AA64ISAR1_GPA_ARCHITECTED		0x1
+#define ID_AA64ISAR1_GPI_NI			0x0
+#define ID_AA64ISAR1_GPI_IMP_DEF		0x1
 
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index d3be9dbf549007..43d4c329775f7e 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -66,6 +66,13 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
 }
 NOKPROBE_SYMBOL(el1_dbg);
 
+static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr)
+{
+	local_daif_inherit(regs);
+	do_ptrauth_fault(regs, esr);
+}
+NOKPROBE_SYMBOL(el1_fpac);
+
 asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
@@ -92,6 +99,9 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
 	case ESR_ELx_EC_BRK64:
 		el1_dbg(regs, esr);
 		break;
+	case ESR_ELx_EC_FPAC:
+		el1_fpac(regs, esr);
+		break;
 	default:
 		el1_inv(regs, esr);
 	}
@@ -227,6 +237,14 @@ static void notrace el0_svc(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(el0_svc);
 
+static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr)
+{
+	user_exit_irqoff();
+	local_daif_restore(DAIF_PROCCTX);
+	do_ptrauth_fault(regs, esr);
+}
+NOKPROBE_SYMBOL(el0_fpac);
+
 asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
@@ -272,6 +290,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
 	case ESR_ELx_EC_BRK64:
 		el0_dbg(regs, esr);
 		break;
+	case ESR_ELx_EC_FPAC:
+		el0_fpac(regs, esr);
+		break;
 	default:
 		el0_inv(regs, esr);
 	}
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 29fd00fe94f2e1..b24f81197a6825 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -479,6 +479,17 @@ void do_bti(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_bti);
 
+void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr)
+{
+	/*
+	 * Unexpected FPAC exception or pointer authentication failure in
+	 * the kernel: kill the task before it does any more harm.
+	 */
+	BUG_ON(!user_mode(regs));
+	force_signal_inject(SIGILL, ILL_ILLOPN, regs->pc, esr);
+}
+NOKPROBE_SYMBOL(do_ptrauth_fault);
+
 #define __user_cache_maint(insn, address, res)			\
 	if (address >= user_addr_max()) {			\
 		res = -EFAULT;					\
@@ -775,6 +786,7 @@ static const char *esr_class_str[] = {
 	[ESR_ELx_EC_SYS64]		= "MSR/MRS (AArch64)",
 	[ESR_ELx_EC_SVE]		= "SVE",
 	[ESR_ELx_EC_ERET]		= "ERET/ERETAA/ERETAB",
+	[ESR_ELx_EC_FPAC]		= "FPAC",
 	[ESR_ELx_EC_IMP_DEF]		= "EL3 IMP DEF",
 	[ESR_ELx_EC_IABT_LOW]		= "IABT (lower EL)",
 	[ESR_ELx_EC_IABT_CUR]		= "IABT (current EL)",

From ba9d1d3e3e7c34826b62498f7d6563b73c22ac13 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Mon, 14 Sep 2020 14:06:54 +0530
Subject: [PATCH 113/262] arm64: cpufeature: Modify address authentication
 cpufeature to exact

The current address authentication cpufeature levels are set as LOWER_SAFE
which is not compatible with the different configurations added for Armv8.3
ptrauth enhancements as the different levels have different behaviour and
there is no tunable to enable the lower safe versions. This is rectified
by setting those cpufeature type as EXACT.

The current cpufeature framework also does not interfere in the booting of
non-exact secondary cpus but rather marks them as tainted. As a workaround
this is fixed by replacing the generic match handler with a new handler
specific to ptrauth.

After this change, if there is any variation in ptrauth configurations in
secondary cpus from boot cpu then those mismatched cpus are parked in an
infinite loop.

Following ptrauth crash log is observed in Arm fastmodel with simulated
mismatched cpus without this fix,

 CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64ISAR1_EL1. Boot CPU: 0x11111110211402, CPU4: 0x11111110211102
 CPU features: Unsupported CPU feature variation detected.
 GICv3: CPU4: found redistributor 100 region 0:0x000000002f180000
 CPU4: Booted secondary processor 0x0000000100 [0x410fd0f0]
 Unable to handle kernel paging request at virtual address bfff800010dadf3c
 Mem abort info:
   ESR = 0x86000004
   EC = 0x21: IABT (current EL), IL = 32 bits
   SET = 0, FnV = 0
   EA = 0, S1PTW = 0
 [bfff800010dadf3c] address between user and kernel address ranges
 Internal error: Oops: 86000004 [#1] PREEMPT SMP
 Modules linked in:
 CPU: 4 PID: 29 Comm: migration/4 Tainted: G S                5.8.0-rc4-00005-ge658591d66d1-dirty #158
 Hardware name: Foundation-v8A (DT)
 pstate: 60000089 (nZCv daIf -PAN -UAO BTYPE=--)
 pc : 0xbfff800010dadf3c
 lr : __schedule+0x2b4/0x5a8
 sp : ffff800012043d70
 x29: ffff800012043d70 x28: 0080000000000000
 x27: ffff800011cbe000 x26: ffff00087ad37580
 x25: ffff00087ad37000 x24: ffff800010de7d50
 x23: ffff800011674018 x22: 0784800010dae2a8
 x21: ffff00087ad37000 x20: ffff00087acb8000
 x19: ffff00087f742100 x18: 0000000000000030
 x17: 0000000000000000 x16: 0000000000000000
 x15: ffff800011ac1000 x14: 00000000000001bd
 x13: 0000000000000000 x12: 0000000000000000
 x11: 0000000000000000 x10: 71519a147ddfeb82
 x9 : 825d5ec0fb246314 x8 : ffff00087ad37dd8
 x7 : 0000000000000000 x6 : 00000000fffedb0e
 x5 : 00000000ffffffff x4 : 0000000000000000
 x3 : 0000000000000028 x2 : ffff80086e11e000
 x1 : ffff00087ad37000 x0 : ffff00087acdc600
 Call trace:
  0xbfff800010dadf3c
  schedule+0x78/0x110
  schedule_preempt_disabled+0x24/0x40
  __kthread_parkme+0x68/0xd0
  kthread+0x138/0x160
  ret_from_fork+0x10/0x34
 Code: bad PC value

After this fix, the mismatched CPU4 is parked as,
 CPU features: CPU4: Detected conflict for capability 39 (Address authentication (IMP DEF algorithm)), System: 1, CPU: 0
 CPU4: will not boot
 CPU4: failed to come online
 CPU4: died during early boot

[Suzuki: Introduce new matching function for address authentication]

Suggested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200914083656.21428-5-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 44 +++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6424584be01e6d..fdbc26c0d71262 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -197,9 +197,9 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
+		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_API_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
-		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
+		       FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_APA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -1648,11 +1648,37 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
 #endif /* CONFIG_ARM64_RAS_EXTN */
 
 #ifdef CONFIG_ARM64_PTR_AUTH
-static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
-			     int __unused)
+static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope)
 {
-	return __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
-	       __system_matches_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF);
+	int boot_val, sec_val;
+
+	/* We don't expect to be called with SCOPE_SYSTEM */
+	WARN_ON(scope == SCOPE_SYSTEM);
+	/*
+	 * The ptr-auth feature levels are not intercompatible with lower
+	 * levels. Hence we must match ptr-auth feature level of the secondary
+	 * CPUs with that of the boot CPU. The level of boot cpu is fetched
+	 * from the sanitised register whereas direct register read is done for
+	 * the secondary CPUs.
+	 * The sanitised feature state is guaranteed to match that of the
+	 * boot CPU as a mismatched secondary CPU is parked before it gets
+	 * a chance to update the state, with the capability.
+	 */
+	boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg),
+					       entry->field_pos, entry->sign);
+	if (scope & SCOPE_BOOT_CPU)
+		return boot_val >= entry->min_field_value;
+	/* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */
+	sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg),
+					      entry->field_pos, entry->sign);
+	return sec_val == boot_val;
+}
+
+static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry,
+				     int scope)
+{
+	return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) ||
+	       has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope);
 }
 
 static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
@@ -2021,7 +2047,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_APA_SHIFT,
 		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
-		.matches = has_cpuid_feature,
+		.matches = has_address_auth_cpucap,
 	},
 	{
 		.desc = "Address authentication (IMP DEF algorithm)",
@@ -2031,12 +2057,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64ISAR1_API_SHIFT,
 		.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
-		.matches = has_cpuid_feature,
+		.matches = has_address_auth_cpucap,
 	},
 	{
 		.capability = ARM64_HAS_ADDRESS_AUTH,
 		.type = ARM64_CPUCAP_BOOT_CPU_FEATURE,
-		.matches = has_address_auth,
+		.matches = has_address_auth_metacap,
 	},
 	{
 		.desc = "Generic authentication (architected algorithm)",

From 6560edca515e53bb2e7c637ab324313680a133f4 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Mon, 14 Sep 2020 14:06:55 +0530
Subject: [PATCH 114/262] arm64: kprobe: disable probe of fault prone ptrauth
 instruction

With the addition of ARMv8.3-FPAC feature, the probe of authenticate
ptrauth instructions (AUT*) may cause ptrauth fault exception in case of
authenticate failure so they cannot be safely single stepped.

Hence the probe of authenticate instructions is disallowed but the
corresponding pac ptrauth instruction (PAC*) is not affected and they can
still be probed. Also AUTH* instructions do not make sense at function
entry points so most realistic probes would be unaffected by this change.

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Dave Martin <dave.martin@arm.com>
Link: https://lore.kernel.org/r/20200914083656.21428-6-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/insn.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index ccc8c9e22b2581..6c0de2f60ea96a 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -60,16 +60,10 @@ bool __kprobes aarch64_insn_is_steppable_hint(u32 insn)
 	case AARCH64_INSN_HINT_XPACLRI:
 	case AARCH64_INSN_HINT_PACIA_1716:
 	case AARCH64_INSN_HINT_PACIB_1716:
-	case AARCH64_INSN_HINT_AUTIA_1716:
-	case AARCH64_INSN_HINT_AUTIB_1716:
 	case AARCH64_INSN_HINT_PACIAZ:
 	case AARCH64_INSN_HINT_PACIASP:
 	case AARCH64_INSN_HINT_PACIBZ:
 	case AARCH64_INSN_HINT_PACIBSP:
-	case AARCH64_INSN_HINT_AUTIAZ:
-	case AARCH64_INSN_HINT_AUTIASP:
-	case AARCH64_INSN_HINT_AUTIBZ:
-	case AARCH64_INSN_HINT_AUTIBSP:
 	case AARCH64_INSN_HINT_BTI:
 	case AARCH64_INSN_HINT_BTIC:
 	case AARCH64_INSN_HINT_BTIJ:

From 03c9c8fad6cb5e8fdfb40287fa1cdf8ee2db0b67 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Mon, 14 Sep 2020 14:06:56 +0530
Subject: [PATCH 115/262] arm64: kprobe: clarify the comment of steppable hint
 instructions

The existing comment about steppable hint instruction is not complete
and only describes NOP instructions as steppable. As the function
aarch64_insn_is_steppable_hint allows all white-listed instruction
to be probed so the comment is updated to reflect this.

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Reviewed-by: Dave Martin <dave.martin@arm.com>
Link: https://lore.kernel.org/r/20200914083656.21428-7-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/probes/decode-insn.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index c541fb48886e3b..104101f633b10e 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -43,8 +43,10 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
 			     != AARCH64_INSN_SPCLREG_DAIF;
 
 		/*
-		 * The HINT instruction is is problematic when single-stepping,
-		 * except for the NOP case.
+		 * The HINT instruction is steppable only if it is in whitelist
+		 * and the rest of other such instructions are blocked for
+		 * single stepping as they may cause exception or other
+		 * unintended behaviour.
 		 */
 		if (aarch64_insn_is_hint(insn))
 			return aarch64_insn_is_steppable_hint(insn);

From 2cf660eb81e93f58ae31215af67fee8499901dd9 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 14 Sep 2020 09:47:30 +1000
Subject: [PATCH 116/262] arm64/mm: Refactor {pgd, pud, pmd, pte}_ERROR()

The function __{pgd, pud, pmd, pte}_error() are introduced so that
they can be called by {pgd, pud, pmd, pte}_ERROR(). However, some
of the functions could never be called when the corresponding page
table level isn't enabled. For example, __{pud, pmd}_error() are
unused when PUD and PMD are folded to PGD.

This removes __{pgd, pud, pmd, pte}_error() and call pr_err() from
{pgd, pud, pmd, pte}_ERROR() directly, similar to what x86/powerpc
are doing. With this, the code looks a bit simplified either.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20200913234730.23145-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 17 ++++++++---------
 arch/arm64/kernel/traps.c        | 20 --------------------
 2 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d5d3fbe739534f..e0ab81923c3059 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -35,11 +35,6 @@
 
 extern struct page *vmemmap;
 
-extern void __pte_error(const char *file, int line, unsigned long val);
-extern void __pmd_error(const char *file, int line, unsigned long val);
-extern void __pud_error(const char *file, int line, unsigned long val);
-extern void __pgd_error(const char *file, int line, unsigned long val);
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 
@@ -57,7 +52,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr)	phys_to_page(__pa_symbol(empty_zero_page))
 
-#define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
+#define pte_ERROR(e)	\
+	pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
 
 /*
  * Macros to convert between a physical address and its placement in a
@@ -541,7 +537,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
-#define pmd_ERROR(pmd)		__pmd_error(__FILE__, __LINE__, pmd_val(pmd))
+#define pmd_ERROR(e)	\
+	pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
 
 #define pud_none(pud)		(!pud_val(pud))
 #define pud_bad(pud)		(!(pud_val(pud) & PUD_TABLE_BIT))
@@ -608,7 +605,8 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
 
 #if CONFIG_PGTABLE_LEVELS > 3
 
-#define pud_ERROR(pud)		__pud_error(__FILE__, __LINE__, pud_val(pud))
+#define pud_ERROR(e)	\
+	pr_err("%s:%d: bad pud %016llx.\n", __FILE__, __LINE__, pud_val(e))
 
 #define p4d_none(p4d)		(!p4d_val(p4d))
 #define p4d_bad(p4d)		(!(p4d_val(p4d) & 2))
@@ -667,7 +665,8 @@ static inline unsigned long p4d_page_vaddr(p4d_t p4d)
 
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
-#define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
+#define pgd_ERROR(e)	\
+	pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
 
 #define pgd_set_fixmap(addr)	((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
 #define pgd_clear_fixmap()	clear_fixmap(FIX_PGD)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index af25cedf54e95e..1e48b869be210c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -935,26 +935,6 @@ asmlinkage void enter_from_user_mode(void)
 }
 NOKPROBE_SYMBOL(enter_from_user_mode);
 
-void __pte_error(const char *file, int line, unsigned long val)
-{
-	pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
-}
-
-void __pmd_error(const char *file, int line, unsigned long val)
-{
-	pr_err("%s:%d: bad pmd %016lx.\n", file, line, val);
-}
-
-void __pud_error(const char *file, int line, unsigned long val)
-{
-	pr_err("%s:%d: bad pud %016lx.\n", file, line, val);
-}
-
-void __pgd_error(const char *file, int line, unsigned long val)
-{
-	pr_err("%s:%d: bad pgd %016lx.\n", file, line, val);
-}
-
 /* GENERIC_BUG traps */
 
 int is_valid_bugaddr(unsigned long addr)

From 118bb62f271a2b3e2f6ab058b6f43d2601f85480 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Mon, 14 Sep 2020 15:28:42 +0800
Subject: [PATCH 117/262] arm64: hibernate: Remove unused including
 <linux/version.h>

Remove including <linux/version.h> that don't need it.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1600068522-54499-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/hibernate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 68e14152d6e9b0..735cfd8a744fee 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/suspend.h>
 #include <linux/utsname.h>
-#include <linux/version.h>
 
 #include <asm/barrier.h>
 #include <asm/cacheflush.h>

From 13c877f4b48b943105ad9e13ba2c7a093fb694e8 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 8 Sep 2020 10:55:12 -0700
Subject: [PATCH 118/262] x86/mce: Stop mce_reign() from re-computing severity
 for every CPU

Back in commit:

  20d51a426fe9 ("x86/mce: Reuse one of the u16 padding fields in 'struct mce'")

a field was added to "struct mce" to save the computed error severity.

Make use of this in mce_reign() to avoid re-computing the severity
for every CPU.

In the case where the machine panics, one call to mce_severity() is
still needed in order to provide the correct message giving the reason
for the panic.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200908175519.14223-2-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/core.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index a697baee9aa0b2..5b1d5f33d60bb7 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -920,7 +920,6 @@ static void mce_reign(void)
 	struct mce *m = NULL;
 	int global_worst = 0;
 	char *msg = NULL;
-	char *nmsg = NULL;
 
 	/*
 	 * This CPU is the Monarch and the other CPUs have run
@@ -928,12 +927,10 @@ static void mce_reign(void)
 	 * Grade the severity of the errors of all the CPUs.
 	 */
 	for_each_possible_cpu(cpu) {
-		int severity = mce_severity(&per_cpu(mces_seen, cpu),
-					    mca_cfg.tolerant,
-					    &nmsg, true);
-		if (severity > global_worst) {
-			msg = nmsg;
-			global_worst = severity;
+		struct mce *mtmp = &per_cpu(mces_seen, cpu);
+
+		if (mtmp->severity > global_worst) {
+			global_worst = mtmp->severity;
 			m = &per_cpu(mces_seen, cpu);
 		}
 	}
@@ -943,8 +940,11 @@ static void mce_reign(void)
 	 * This dumps all the mces in the log buffer and stops the
 	 * other CPUs.
 	 */
-	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
+	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+		/* call mce_severity() to get "msg" for panic */
+		mce_severity(m, mca_cfg.tolerant, &msg, true);
 		mce_panic("Fatal machine check", m, msg);
+	}
 
 	/*
 	 * For UC somewhere we let the CPU who detects it handle it.

From dc0592b73715c8e84ad8ebbc50c6057d5e203aac Mon Sep 17 00:00:00 2001
From: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Date: Thu, 3 Sep 2020 18:45:31 -0500
Subject: [PATCH 119/262] x86/mce/dev-mcelog: Do not update kflags on AMD
 systems

The mcelog utility is not commonly used on AMD systems. Therefore,
errors logged only by the dev_mce_log() notifier will be missed. This
may occur if the EDAC modules are not loaded, in which case it's
preferable to print the error record by the default notifier.

However, the mce->kflags set by dev_mce_log() notifier makes the
default notifier skip over the errors assuming they are processed by
dev_mce_log().

Do not update kflags in the dev_mce_log() notifier on AMD systems.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200903234531.162484-3-Smita.KoralahalliChannabasappa@amd.com
---
 arch/x86/kernel/cpu/mce/dev-mcelog.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mce/dev-mcelog.c b/arch/x86/kernel/cpu/mce/dev-mcelog.c
index 03e51053592acb..100fbeebdc728a 100644
--- a/arch/x86/kernel/cpu/mce/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mce/dev-mcelog.c
@@ -67,7 +67,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 unlock:
 	mutex_unlock(&mce_chrdev_read_mutex);
 
-	mce->kflags |= MCE_HANDLED_MCELOG;
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		mce->kflags |= MCE_HANDLED_MCELOG;
+
 	return NOTIFY_OK;
 }
 

From 2b694fc92a34e8c8b774a17266656d72b8cd4429 Mon Sep 17 00:00:00 2001
From: Tuan Phan <tuanphan@os.amperecomputing.com>
Date: Mon, 14 Sep 2020 11:04:16 -0700
Subject: [PATCH 120/262] perf: arm_dsu: Support DSU ACPI devices

Add support for probing device from ACPI node.
Each DSU ACPI node and its associated cpus are inside a cluster node.

Signed-off-by: Tuan Phan <tuanphan@os.amperecomputing.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/1600106656-9542-1-git-send-email-tuanphan@os.amperecomputing.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dsu_pmu.c | 63 ++++++++++++++++++++++++++++++++++----
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 96ed93cc78e65a..98e68ed7db85f8 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -11,6 +11,7 @@
 #define DRVNAME		PMUNAME "_pmu"
 #define pr_fmt(fmt)	DRVNAME ": " fmt
 
+#include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
@@ -603,18 +604,19 @@ static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
 }
 
 /**
- * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster.
+ * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster
+ * from device tree.
  */
-static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask)
+static int dsu_pmu_dt_get_cpus(struct device *dev, cpumask_t *mask)
 {
 	int i = 0, n, cpu;
 	struct device_node *cpu_node;
 
-	n = of_count_phandle_with_args(dev, "cpus", NULL);
+	n = of_count_phandle_with_args(dev->of_node, "cpus", NULL);
 	if (n <= 0)
 		return -ENODEV;
 	for (; i < n; i++) {
-		cpu_node = of_parse_phandle(dev, "cpus", i);
+		cpu_node = of_parse_phandle(dev->of_node, "cpus", i);
 		if (!cpu_node)
 			break;
 		cpu = of_cpu_node_to_id(cpu_node);
@@ -631,6 +633,36 @@ static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask)
 	return 0;
 }
 
+/**
+ * dsu_pmu_acpi_get_cpus: Get the list of CPUs in the cluster
+ * from ACPI.
+ */
+static int dsu_pmu_acpi_get_cpus(struct device *dev, cpumask_t *mask)
+{
+#ifdef CONFIG_ACPI
+	int cpu;
+
+	/*
+	 * A dsu pmu node is inside a cluster parent node along with cpu nodes.
+	 * We need to find out all cpus that have the same parent with this pmu.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct acpi_device *acpi_dev;
+		struct device *cpu_dev = get_cpu_device(cpu);
+
+		if (!cpu_dev)
+			continue;
+
+		acpi_dev = ACPI_COMPANION(cpu_dev);
+		if (acpi_dev &&
+			acpi_dev->parent == ACPI_COMPANION(dev)->parent)
+			cpumask_set_cpu(cpu, mask);
+	}
+#endif
+
+	return 0;
+}
+
 /*
  * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster.
  */
@@ -676,6 +708,7 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
 {
 	int irq, rc;
 	struct dsu_pmu *dsu_pmu;
+	struct fwnode_handle *fwnode = dev_fwnode(&pdev->dev);
 	char *name;
 	static atomic_t pmu_idx = ATOMIC_INIT(-1);
 
@@ -683,7 +716,16 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
 	if (IS_ERR(dsu_pmu))
 		return PTR_ERR(dsu_pmu);
 
-	rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus);
+	if (IS_ERR_OR_NULL(fwnode))
+		return -ENOENT;
+
+	if (is_of_node(fwnode))
+		rc = dsu_pmu_dt_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus);
+	else if (is_acpi_device_node(fwnode))
+		rc = dsu_pmu_acpi_get_cpus(&pdev->dev, &dsu_pmu->associated_cpus);
+	else
+		return -ENOENT;
+
 	if (rc) {
 		dev_warn(&pdev->dev, "Failed to parse the CPUs\n");
 		return rc;
@@ -752,11 +794,21 @@ static const struct of_device_id dsu_pmu_of_match[] = {
 	{ .compatible = "arm,dsu-pmu", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, dsu_pmu_of_match);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id dsu_pmu_acpi_match[] = {
+	{ "ARMHD500", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, dsu_pmu_acpi_match);
+#endif
 
 static struct platform_driver dsu_pmu_driver = {
 	.driver = {
 		.name	= DRVNAME,
 		.of_match_table = of_match_ptr(dsu_pmu_of_match),
+		.acpi_match_table = ACPI_PTR(dsu_pmu_acpi_match),
 		.suppress_bind_attrs = true,
 	},
 	.probe = dsu_pmu_device_probe,
@@ -826,7 +878,6 @@ static void __exit dsu_pmu_exit(void)
 module_init(dsu_pmu_init);
 module_exit(dsu_pmu_exit);
 
-MODULE_DEVICE_TABLE(of, dsu_pmu_of_match);
 MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit");
 MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>");
 MODULE_LICENSE("GPL v2");

From c7b6bac9c72c5fcbd6e9e12545bd3022c7f21860 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:05 -0700
Subject: [PATCH 121/262] drm, iommu: Change type of pasid to u32

PASID is defined as a few different types in iommu including "int",
"u32", and "unsigned int". To be consistent and to match with uapi
definitions, define PASID and its variations (e.g. max PASID) as "u32".
"u32" is also shorter and a little more explicit than "unsigned int".

No PASID type change in uapi although it defines PASID as __u64 in
some places.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/1600187413-163670-2-git-send-email-fenghua.yu@intel.com
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  4 +--
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c    |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c       |  6 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h       |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |  8 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  8 ++---
 .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c       |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h       |  2 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |  7 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c       |  8 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_events.h       |  4 +--
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c        |  6 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_pasid.c        |  4 +--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h         | 20 ++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  2 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
 drivers/iommu/amd/amd_iommu.h                 | 10 +++---
 drivers/iommu/amd/iommu.c                     | 31 ++++++++++---------
 drivers/iommu/amd/iommu_v2.c                  | 20 ++++++------
 drivers/iommu/intel/dmar.c                    |  7 +++--
 drivers/iommu/intel/iommu.c                   |  4 +--
 drivers/iommu/intel/pasid.c                   | 31 +++++++++----------
 drivers/iommu/intel/pasid.h                   | 24 +++++++-------
 drivers/iommu/intel/svm.c                     | 12 +++----
 drivers/iommu/iommu.c                         |  2 +-
 drivers/misc/uacce/uacce.c                    |  2 +-
 include/linux/amd-iommu.h                     |  8 ++---
 include/linux/intel-iommu.h                   | 12 +++----
 include/linux/intel-svm.h                     |  2 +-
 include/linux/iommu.h                         | 10 +++---
 include/linux/uacce.h                         |  2 +-
 38 files changed, 141 insertions(+), 141 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index ffe149aafc3933..dfef5a7e0f5a4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -207,11 +207,11 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
 	})
 
 /* GPUVM API */
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
 					void **vm, void **process_info,
 					struct dma_fence **ef);
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					struct file *filp, unsigned int pasid,
+					struct file *filp, u32 pasid,
 					void **vm, void **process_info,
 					struct dma_fence **ef);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index bf927f432506dc..ee531c3988d184 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -105,7 +105,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 	unlock_srbm(kgd);
 }
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
 					unsigned int vmid)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 744366c7ee85d3..4d41317b92921d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -139,7 +139,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 	unlock_srbm(kgd);
 }
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
 					unsigned int vmid)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index feab4cc6e83676..35917d4b50f6d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -96,7 +96,7 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 	unlock_srbm(kgd);
 }
 
-static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
 					unsigned int vmid)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 1102de76d8767b..1abfe63c80fe35 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -110,7 +110,7 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 	unlock_srbm(kgd);
 }
 
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
 					unsigned int vmid)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index aedf67d57449ca..ff2bc72e664614 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -26,7 +26,7 @@ void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t sh_mem_config,
 		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
 		uint32_t sh_mem_bases);
-int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid,
 		unsigned int vmid);
 int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a58af513c95262..d02c5c177a98e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -992,7 +992,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 	return ret;
 }
 
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid,
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, u32 pasid,
 					  void **vm, void **process_info,
 					  struct dma_fence **ef)
 {
@@ -1028,7 +1028,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasi
 }
 
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
-					   struct file *filp, unsigned int pasid,
+					   struct file *filp, u32 pasid,
 					   void **vm, void **process_info,
 					   struct dma_fence **ef)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 7521f4ab55de51..6e9a9e5dbea073 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -43,7 +43,7 @@ static DEFINE_IDA(amdgpu_pasid_ida);
 /* Helper to free pasid from a fence callback */
 struct amdgpu_pasid_cb {
 	struct dma_fence_cb cb;
-	unsigned int pasid;
+	u32 pasid;
 };
 
 /**
@@ -79,7 +79,7 @@ int amdgpu_pasid_alloc(unsigned int bits)
  * amdgpu_pasid_free - Free a PASID
  * @pasid: PASID to free
  */
-void amdgpu_pasid_free(unsigned int pasid)
+void amdgpu_pasid_free(u32 pasid)
 {
 	trace_amdgpu_pasid_freed(pasid);
 	ida_simple_remove(&amdgpu_pasid_ida, pasid);
@@ -105,7 +105,7 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
  * Free the pasid only after all the fences in resv are signaled.
  */
 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
-			       unsigned int pasid)
+			       u32 pasid)
 {
 	struct dma_fence *fence, **fences;
 	struct amdgpu_pasid_cb *cb;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 8e58325bbca257..0c3b4fa1f93603 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -71,9 +71,9 @@ struct amdgpu_vmid_mgr {
 };
 
 int amdgpu_pasid_alloc(unsigned int bits);
-void amdgpu_pasid_free(unsigned int pasid);
+void amdgpu_pasid_free(u32 pasid);
 void amdgpu_pasid_free_delayed(struct dma_resv *resv,
-			       unsigned int pasid);
+			       u32 pasid);
 
 bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 			       struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 414548064648ed..b403b2a88ee5d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1084,7 +1084,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
 	struct amdgpu_bo *pd;
-	unsigned int pasid;
+	u32 pasid;
 	int handle;
 
 	if (!fpriv)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 71e005cf29522e..cb1d7cddebc3e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2785,7 +2785,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  * 0 for success, error for failure.
  */
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		   int vm_context, unsigned int pasid)
+		   int vm_context, u32 pasid)
 {
 	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *root;
@@ -2956,7 +2956,7 @@ static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
  * 0 for success, -errno for errors.
  */
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			   unsigned int pasid)
+			   u32 pasid)
 {
 	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
 	int r;
@@ -3254,7 +3254,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  * @pasid: PASID identifier for VM
  * @task_info: task_info to fill.
  */
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
 			 struct amdgpu_task_info *task_info)
 {
 	struct amdgpu_vm *vm;
@@ -3298,7 +3298,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * Try to gracefully handle a VM fault. Return true if the fault was handled and
  * shouldn't be reported any more.
  */
-bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 			    uint64_t addr)
 {
 	struct amdgpu_bo *root;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 770025a5e50039..ffbc0cc87ccf5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -372,8 +372,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		   int vm_context, unsigned int pasid);
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
+		   int vm_context, u32 pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
@@ -430,9 +430,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job);
 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 
-void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
 			     struct amdgpu_task_info *task_info);
-bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
+bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 			    uint64_t addr);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 24b4717341172f..dcb1d89d776eec 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -91,7 +91,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
 			(const struct cik_ih_ring_entry *)ih_ring_entry;
 	uint32_t context_id = ihre->data & 0xfffffff;
 	unsigned int vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
-	unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16;
+	u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
 
 	if (pasid == 0)
 		return;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 27bcc5b472f68f..b258a3dae767f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -45,7 +45,7 @@ static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 }
 
 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
-				unsigned int pasid, uint64_t vmid0_address,
+				u32 pasid, uint64_t vmid0_address,
 				uint32_t *packet_buff, size_t size_in_bytes)
 {
 	struct pm4__release_mem *rm_packet;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
index a04a1fe1d0d935..f9c6df1fdc5c5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
@@ -275,7 +275,7 @@ struct kfd_dbgdev {
 };
 
 struct kfd_dbgmgr {
-	unsigned int pasid;
+	u32 pasid;
 	struct kfd_dev *dev;
 	struct kfd_dbgdev *dbgdev;
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e0e60b0d0669eb..1ec2567abbf5e5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -40,7 +40,7 @@
 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
 
 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
-					unsigned int pasid, unsigned int vmid);
+				  u32 pasid, unsigned int vmid);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm,
 				enum kfd_unmap_queues_filter filter,
@@ -948,7 +948,7 @@ static int unregister_process(struct device_queue_manager *dqm,
 }
 
 static int
-set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
+set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
 			unsigned int vmid)
 {
 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
@@ -1979,8 +1979,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm)
 	kfree(dqm);
 }
 
-int kfd_process_vm_fault(struct device_queue_manager *dqm,
-			 unsigned int pasid)
+int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
 {
 	struct kfd_process_device *pdd;
 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index a9583b95fcc13b..ba2c2ce0c55afc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -460,7 +460,7 @@ static void set_event_from_interrupt(struct kfd_process *p,
 	}
 }
 
-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
+void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
 				uint32_t valid_id_bits)
 {
 	struct kfd_event *ev = NULL;
@@ -872,7 +872,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 }
 
 #ifdef KFD_SUPPORT_IOMMU_V2
-void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
+void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,
 		unsigned long address, bool is_write_requested,
 		bool is_execute_requested)
 {
@@ -950,7 +950,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 }
 #endif /* KFD_SUPPORT_IOMMU_V2 */
 
-void kfd_signal_hw_exception_event(unsigned int pasid)
+void kfd_signal_hw_exception_event(u32 pasid)
 {
 	/*
 	 * Because we are called from arbitrary context (workqueue) as opposed
@@ -971,7 +971,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid)
 	kfd_unref_process(p);
 }
 
-void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
+void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
 				struct kfd_vm_fault_info *info)
 {
 	struct kfd_event *ev;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index c7ac6c73af86eb..c8fe5dbdad55c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -79,7 +79,7 @@ struct kfd_event {
 #define KFD_EVENT_TYPE_DEBUG 5
 #define KFD_EVENT_TYPE_MEMORY 8
 
-extern void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
-					uint32_t valid_id_bits);
+extern void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
+				       uint32_t valid_id_bits);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index 7c8786b9eb0aaa..e8ef3886688bac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -139,7 +139,7 @@ void kfd_iommu_unbind_process(struct kfd_process *p)
 }
 
 /* Callback for process shutdown invoked by the IOMMU driver */
-static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
+static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid)
 {
 	struct kfd_dev *dev = kfd_device_by_pci_dev(pdev);
 	struct kfd_process *p;
@@ -185,8 +185,8 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid)
 }
 
 /* This function called by IOMMU driver on PPR failure */
-static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
-		unsigned long address, u16 flags)
+static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid,
+				unsigned long address, u16 flags)
 {
 	struct kfd_dev *dev;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index 2a07c4f2cd0dbd..af5816f51e55b8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -51,7 +51,7 @@ unsigned int kfd_get_pasid_limit(void)
 	return 1U << pasid_bits;
 }
 
-unsigned int kfd_pasid_alloc(void)
+u32 kfd_pasid_alloc(void)
 {
 	int r = amdgpu_pasid_alloc(pasid_bits);
 
@@ -63,7 +63,7 @@ unsigned int kfd_pasid_alloc(void)
 	return 0;
 }
 
-void kfd_pasid_free(unsigned int pasid)
+void kfd_pasid_free(u32 pasid)
 {
 	amdgpu_pasid_free(pasid);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6727e9de5b8b06..922ae138ab850c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -723,7 +723,7 @@ struct kfd_process {
 	/* We want to receive a notification when the mm_struct is destroyed */
 	struct mmu_notifier mmu_notifier;
 
-	uint16_t pasid;
+	u32 pasid;
 	unsigned int doorbell_index;
 
 	/*
@@ -800,7 +800,7 @@ int kfd_process_create_wq(void);
 void kfd_process_destroy_wq(void);
 struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
-struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
@@ -841,8 +841,8 @@ int kfd_pasid_init(void);
 void kfd_pasid_exit(void);
 bool kfd_set_pasid_limit(unsigned int new_limit);
 unsigned int kfd_get_pasid_limit(void);
-unsigned int kfd_pasid_alloc(void);
-void kfd_pasid_free(unsigned int pasid);
+u32 kfd_pasid_alloc(void);
+void kfd_pasid_free(u32 pasid);
 
 /* Doorbells */
 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
@@ -927,7 +927,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm);
 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 					enum kfd_queue_type type);
 void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
-int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
+int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid);
 
 /* Process Queue Manager */
 struct process_queue_node {
@@ -1049,12 +1049,12 @@ int kfd_wait_on_events(struct kfd_process *p,
 		       uint32_t num_events, void __user *data,
 		       bool all, uint32_t user_timeout_ms,
 		       uint32_t *wait_result);
-void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
+void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id,
 				uint32_t valid_id_bits);
 void kfd_signal_iommu_event(struct kfd_dev *dev,
-		unsigned int pasid, unsigned long address,
-		bool is_write_requested, bool is_execute_requested);
-void kfd_signal_hw_exception_event(unsigned int pasid);
+			    u32 pasid, unsigned long address,
+			    bool is_write_requested, bool is_execute_requested);
+void kfd_signal_hw_exception_event(u32 pasid);
 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
 int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
@@ -1065,7 +1065,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		     uint64_t *event_page_offset, uint32_t *event_slot_index);
 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
 
-void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
+void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
 				struct kfd_vm_fault_info *info);
 
 void kfd_signal_reset_event(struct kfd_dev *dev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 40695d52e9a8df..627793029033e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1306,7 +1306,7 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
 }
 
 /* This increments the process->ref counter. */
-struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
+struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
 {
 	struct kfd_process *p, *ret_p = NULL;
 	unsigned int temp;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index a3c238c39ef572..301de493377af2 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -226,7 +226,7 @@ struct kfd2kgd_calls {
 			uint32_t sh_mem_config,	uint32_t sh_mem_ape1_base,
 			uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
 
-	int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
+	int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, u32 pasid,
 					unsigned int vmid);
 
 	int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 57309716fd180a..030ee90197a147 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -45,12 +45,12 @@ extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
 extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
 extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
 extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
-extern int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
 				u64 address);
-extern int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid);
-extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
+extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
 				     unsigned long cr3);
-extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid);
+extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
 extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
 
 #ifdef CONFIG_IRQ_REMAP
@@ -66,7 +66,7 @@ static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 #define PPR_INVALID			0x1
 #define PPR_FAILURE			0xf
 
-extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+extern int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
 				  int status, int tag);
 
 static inline bool is_rd890_iommu(struct pci_dev *pdev)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 07ae8b93887e56..a21c717e107d6a 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -513,10 +513,11 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
 	struct device *dev = iommu->iommu.dev;
-	int type, devid, pasid, flags, tag;
+	int type, devid, flags, tag;
 	volatile u32 *event = __evt;
 	int count = 0;
 	u64 address;
+	u32 pasid;
 
 retry:
 	type    = (event[1] >> EVENT_TYPE_SHIFT)  & EVENT_TYPE_MASK;
@@ -909,7 +910,7 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
 		cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
 }
 
-static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid,
 				  u64 address, bool size)
 {
 	memset(cmd, 0, sizeof(*cmd));
@@ -927,7 +928,7 @@ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
 	CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
 }
 
-static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
 				  int qdep, u64 address, bool size)
 {
 	memset(cmd, 0, sizeof(*cmd));
@@ -947,7 +948,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
 	CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
 }
 
-static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
 			       int status, int tag, bool gn)
 {
 	memset(cmd, 0, sizeof(*cmd));
@@ -2786,7 +2787,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
 }
 EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
 
-static int __flush_pasid(struct protection_domain *domain, int pasid,
+static int __flush_pasid(struct protection_domain *domain, u32 pasid,
 			 u64 address, bool size)
 {
 	struct iommu_dev_data *dev_data;
@@ -2847,13 +2848,13 @@ static int __flush_pasid(struct protection_domain *domain, int pasid,
 	return ret;
 }
 
-static int __amd_iommu_flush_page(struct protection_domain *domain, int pasid,
+static int __amd_iommu_flush_page(struct protection_domain *domain, u32 pasid,
 				  u64 address)
 {
 	return __flush_pasid(domain, pasid, address, false);
 }
 
-int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
+int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
 			 u64 address)
 {
 	struct protection_domain *domain = to_pdomain(dom);
@@ -2868,13 +2869,13 @@ int amd_iommu_flush_page(struct iommu_domain *dom, int pasid,
 }
 EXPORT_SYMBOL(amd_iommu_flush_page);
 
-static int __amd_iommu_flush_tlb(struct protection_domain *domain, int pasid)
+static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
 {
 	return __flush_pasid(domain, pasid, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
 			     true);
 }
 
-int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
+int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
 {
 	struct protection_domain *domain = to_pdomain(dom);
 	unsigned long flags;
@@ -2888,7 +2889,7 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, int pasid)
 }
 EXPORT_SYMBOL(amd_iommu_flush_tlb);
 
-static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
+static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
 {
 	int index;
 	u64 *pte;
@@ -2920,7 +2921,7 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc)
 	return pte;
 }
 
-static int __set_gcr3(struct protection_domain *domain, int pasid,
+static int __set_gcr3(struct protection_domain *domain, u32 pasid,
 		      unsigned long cr3)
 {
 	struct domain_pgtable pgtable;
@@ -2939,7 +2940,7 @@ static int __set_gcr3(struct protection_domain *domain, int pasid,
 	return __amd_iommu_flush_tlb(domain, pasid);
 }
 
-static int __clear_gcr3(struct protection_domain *domain, int pasid)
+static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
 {
 	struct domain_pgtable pgtable;
 	u64 *pte;
@@ -2957,7 +2958,7 @@ static int __clear_gcr3(struct protection_domain *domain, int pasid)
 	return __amd_iommu_flush_tlb(domain, pasid);
 }
 
-int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
+int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
 			      unsigned long cr3)
 {
 	struct protection_domain *domain = to_pdomain(dom);
@@ -2972,7 +2973,7 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid,
 }
 EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
 
-int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
+int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
 {
 	struct protection_domain *domain = to_pdomain(dom);
 	unsigned long flags;
@@ -2986,7 +2987,7 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid)
 }
 EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
 
-int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
+int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
 			   int status, int tag)
 {
 	struct iommu_dev_data *dev_data;
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 0d175aed1d92fb..5ecc0bc608ec6f 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -40,7 +40,7 @@ struct pasid_state {
 	struct mmu_notifier mn;                 /* mmu_notifier handle */
 	struct pri_queue pri[PRI_QUEUE_SIZE];	/* PRI tag states */
 	struct device_state *device_state;	/* Link to our device_state */
-	int pasid;				/* PASID index */
+	u32 pasid;				/* PASID index */
 	bool invalid;				/* Used during setup and
 						   teardown of the pasid */
 	spinlock_t lock;			/* Protect pri_queues and
@@ -70,7 +70,7 @@ struct fault {
 	struct mm_struct *mm;
 	u64 address;
 	u16 devid;
-	u16 pasid;
+	u32 pasid;
 	u16 tag;
 	u16 finish;
 	u16 flags;
@@ -150,7 +150,7 @@ static void put_device_state(struct device_state *dev_state)
 
 /* Must be called under dev_state->lock */
 static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
-						  int pasid, bool alloc)
+						  u32 pasid, bool alloc)
 {
 	struct pasid_state **root, **ptr;
 	int level, index;
@@ -184,7 +184,7 @@ static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state
 
 static int set_pasid_state(struct device_state *dev_state,
 			   struct pasid_state *pasid_state,
-			   int pasid)
+			   u32 pasid)
 {
 	struct pasid_state **ptr;
 	unsigned long flags;
@@ -211,7 +211,7 @@ static int set_pasid_state(struct device_state *dev_state,
 	return ret;
 }
 
-static void clear_pasid_state(struct device_state *dev_state, int pasid)
+static void clear_pasid_state(struct device_state *dev_state, u32 pasid)
 {
 	struct pasid_state **ptr;
 	unsigned long flags;
@@ -229,7 +229,7 @@ static void clear_pasid_state(struct device_state *dev_state, int pasid)
 }
 
 static struct pasid_state *get_pasid_state(struct device_state *dev_state,
-					   int pasid)
+					   u32 pasid)
 {
 	struct pasid_state **ptr, *ret = NULL;
 	unsigned long flags;
@@ -594,7 +594,7 @@ static struct notifier_block ppr_nb = {
 	.notifier_call = ppr_notifier,
 };
 
-int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
 			 struct task_struct *task)
 {
 	struct pasid_state *pasid_state;
@@ -615,7 +615,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
 		return -EINVAL;
 
 	ret = -EINVAL;
-	if (pasid < 0 || pasid >= dev_state->max_pasids)
+	if (pasid >= dev_state->max_pasids)
 		goto out;
 
 	ret = -ENOMEM;
@@ -679,7 +679,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
 }
 EXPORT_SYMBOL(amd_iommu_bind_pasid);
 
-void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
+void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
 {
 	struct pasid_state *pasid_state;
 	struct device_state *dev_state;
@@ -695,7 +695,7 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
 	if (dev_state == NULL)
 		return;
 
-	if (pasid < 0 || pasid >= dev_state->max_pasids)
+	if (pasid >= dev_state->max_pasids)
 		goto out;
 
 	pasid_state = get_pasid_state(dev_state, pasid);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 93e6345f3414f0..e4cfa7355fc6dd 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1482,7 +1482,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 }
 
 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
-			  u64 granu, int pasid)
+			  u64 granu, u32 pasid)
 {
 	struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
 
@@ -1796,7 +1796,7 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 }
 
 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
-		u8 fault_reason, int pasid, u16 source_id,
+		u8 fault_reason, u32 pasid, u16 source_id,
 		unsigned long long addr)
 {
 	const char *reason;
@@ -1846,7 +1846,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 		u8 fault_reason;
 		u16 source_id;
 		u64 guest_addr;
-		int type, pasid;
+		u32 pasid;
+		int type;
 		u32 data;
 		bool pasid_present;
 
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 87b17bac04c27c..44bb6fda4755fb 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2527,7 +2527,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
 static int domain_setup_first_level(struct intel_iommu *iommu,
 				    struct dmar_domain *domain,
 				    struct device *dev,
-				    int pasid)
+				    u32 pasid)
 {
 	int flags = PASID_FLAG_SUPERVISOR_MODE;
 	struct dma_pte *pgd = domain->pgd;
@@ -5173,7 +5173,7 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
 		return -ENODEV;
 
 	if (domain->default_pasid <= 0) {
-		int pasid;
+		u32 pasid;
 
 		/* No private data needed for the default pasid */
 		pasid = ioasid_alloc(NULL, PASID_MIN,
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index e6faedf42fd418..b92af83b79bdcf 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -27,7 +27,7 @@
 static DEFINE_SPINLOCK(pasid_lock);
 u32 intel_pasid_max_id = PASID_MAX;
 
-int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
+int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
 {
 	unsigned long flags;
 	u8 status_code;
@@ -58,7 +58,7 @@ int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid)
 	return ret;
 }
 
-void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid)
+void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid)
 {
 	unsigned long flags;
 	u8 status_code;
@@ -146,7 +146,7 @@ int intel_pasid_alloc_table(struct device *dev)
 	struct pasid_table *pasid_table;
 	struct pasid_table_opaque data;
 	struct page *pages;
-	int max_pasid = 0;
+	u32 max_pasid = 0;
 	int ret, order;
 	int size;
 
@@ -168,7 +168,7 @@ int intel_pasid_alloc_table(struct device *dev)
 	INIT_LIST_HEAD(&pasid_table->dev);
 
 	if (info->pasid_supported)
-		max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)),
+		max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
 				  intel_pasid_max_id);
 
 	size = max_pasid >> (PASID_PDE_SHIFT - 3);
@@ -242,7 +242,7 @@ int intel_pasid_get_dev_max_id(struct device *dev)
 	return info->pasid_table->max_pasid;
 }
 
-struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
 {
 	struct device_domain_info *info;
 	struct pasid_table *pasid_table;
@@ -251,8 +251,7 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid)
 	int dir_index, index;
 
 	pasid_table = intel_pasid_get_table(dev);
-	if (WARN_ON(!pasid_table || pasid < 0 ||
-		    pasid >= intel_pasid_get_dev_max_id(dev)))
+	if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev)))
 		return NULL;
 
 	dir = pasid_table->table;
@@ -305,7 +304,7 @@ static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe)
 }
 
 static void
-intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore)
+intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore)
 {
 	struct pasid_entry *pe;
 
@@ -444,7 +443,7 @@ pasid_set_eafe(struct pasid_entry *pe)
 
 static void
 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu,
-				    u16 did, int pasid)
+				    u16 did, u32 pasid)
 {
 	struct qi_desc desc;
 
@@ -473,7 +472,7 @@ iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid)
 
 static void
 devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
-			       struct device *dev, int pasid)
+			       struct device *dev, u32 pasid)
 {
 	struct device_domain_info *info;
 	u16 sid, qdep, pfsid;
@@ -499,7 +498,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu,
 }
 
 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
-				 int pasid, bool fault_ignore)
+				 u32 pasid, bool fault_ignore)
 {
 	struct pasid_entry *pte;
 	u16 did;
@@ -524,7 +523,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev,
 
 static void pasid_flush_caches(struct intel_iommu *iommu,
 				struct pasid_entry *pte,
-				int pasid, u16 did)
+			       u32 pasid, u16 did)
 {
 	if (!ecap_coherent(iommu->ecap))
 		clflush_cache_range(pte, sizeof(*pte));
@@ -543,7 +542,7 @@ static void pasid_flush_caches(struct intel_iommu *iommu,
  */
 int intel_pasid_setup_first_level(struct intel_iommu *iommu,
 				  struct device *dev, pgd_t *pgd,
-				  int pasid, u16 did, int flags)
+				  u32 pasid, u16 did, int flags)
 {
 	struct pasid_entry *pte;
 
@@ -616,7 +615,7 @@ static inline int iommu_skip_agaw(struct dmar_domain *domain,
  */
 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 				   struct dmar_domain *domain,
-				   struct device *dev, int pasid)
+				   struct device *dev, u32 pasid)
 {
 	struct pasid_entry *pte;
 	struct dma_pte *pgd;
@@ -674,7 +673,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
  */
 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
 				   struct dmar_domain *domain,
-				   struct device *dev, int pasid)
+				   struct device *dev, u32 pasid)
 {
 	u16 did = FLPT_DEFAULT_DID;
 	struct pasid_entry *pte;
@@ -760,7 +759,7 @@ intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte,
  * @addr_width: Address width of the first level (guest)
  */
 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
-			     pgd_t *gpgd, int pasid,
+			     pgd_t *gpgd, u32 pasid,
 			     struct iommu_gpasid_bind_data_vtd *pasid_data,
 			     struct dmar_domain *domain, int addr_width)
 {
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index c9850766c3a9f0..97dfcffbf495a9 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -72,7 +72,7 @@ struct pasid_entry {
 struct pasid_table {
 	void			*table;		/* pasid table pointer */
 	int			order;		/* page order of pasid table */
-	int			max_pasid;	/* max pasid */
+	u32			max_pasid;	/* max pasid */
 	struct list_head	dev;		/* device list */
 };
 
@@ -98,31 +98,31 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte)
 	return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT;
 }
 
-extern u32 intel_pasid_max_id;
+extern unsigned int intel_pasid_max_id;
 int intel_pasid_alloc_id(void *ptr, int start, int end, gfp_t gfp);
-void intel_pasid_free_id(int pasid);
-void *intel_pasid_lookup_id(int pasid);
+void intel_pasid_free_id(u32 pasid);
+void *intel_pasid_lookup_id(u32 pasid);
 int intel_pasid_alloc_table(struct device *dev);
 void intel_pasid_free_table(struct device *dev);
 struct pasid_table *intel_pasid_get_table(struct device *dev);
 int intel_pasid_get_dev_max_id(struct device *dev);
-struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
+struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid);
 int intel_pasid_setup_first_level(struct intel_iommu *iommu,
 				  struct device *dev, pgd_t *pgd,
-				  int pasid, u16 did, int flags);
+				  u32 pasid, u16 did, int flags);
 int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 				   struct dmar_domain *domain,
-				   struct device *dev, int pasid);
+				   struct device *dev, u32 pasid);
 int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
 				   struct dmar_domain *domain,
-				   struct device *dev, int pasid);
+				   struct device *dev, u32 pasid);
 int intel_pasid_setup_nested(struct intel_iommu *iommu,
-			     struct device *dev, pgd_t *pgd, int pasid,
+			     struct device *dev, pgd_t *pgd, u32 pasid,
 			     struct iommu_gpasid_bind_data_vtd *pasid_data,
 			     struct dmar_domain *domain, int addr_width);
 void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
-				 struct device *dev, int pasid,
+				 struct device *dev, u32 pasid,
 				 bool fault_ignore);
-int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid);
-void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid);
+int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid);
+void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid);
 #endif /* __INTEL_PASID_H */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 95c3164a2302f0..e78a74a9c1cf7a 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -23,7 +23,7 @@
 #include "pasid.h"
 
 static irqreturn_t prq_event_thread(int irq, void *d);
-static void intel_svm_drain_prq(struct device *dev, int pasid);
+static void intel_svm_drain_prq(struct device *dev, u32 pasid);
 
 #define PRQ_ORDER 0
 
@@ -399,7 +399,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 	return ret;
 }
 
-int intel_svm_unbind_gpasid(struct device *dev, int pasid)
+int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
 {
 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
 	struct intel_svm_dev *sdev;
@@ -620,7 +620,7 @@ intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
 }
 
 /* Caller must hold pasid_mutex */
-static int intel_svm_unbind_mm(struct device *dev, int pasid)
+static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 {
 	struct intel_svm_dev *sdev;
 	struct intel_iommu *iommu;
@@ -739,7 +739,7 @@ static bool is_canonical_address(u64 addr)
  * described in VT-d spec CH7.10 to drain all page requests and page
  * responses pending in the hardware.
  */
-static void intel_svm_drain_prq(struct device *dev, int pasid)
+static void intel_svm_drain_prq(struct device *dev, u32 pasid)
 {
 	struct device_domain_info *info;
 	struct dmar_domain *domain;
@@ -1067,10 +1067,10 @@ void intel_svm_unbind(struct iommu_sva *sva)
 	mutex_unlock(&pasid_mutex);
 }
 
-int intel_svm_get_pasid(struct iommu_sva *sva)
+u32 intel_svm_get_pasid(struct iommu_sva *sva)
 {
 	struct intel_svm_dev *sdev;
-	int pasid;
+	u32 pasid;
 
 	mutex_lock(&pasid_mutex);
 	sdev = to_intel_svm_dev(sva);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 609bd25bf154b5..0e4fbdc0f5e588 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2839,7 +2839,7 @@ void iommu_sva_unbind_device(struct iommu_sva *handle)
 }
 EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
 
-int iommu_sva_get_pasid(struct iommu_sva *handle)
+u32 iommu_sva_get_pasid(struct iommu_sva *handle)
 {
 	const struct iommu_ops *ops = handle->dev->bus->iommu_ops;
 
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index a5b8dab80c76be..4cb7a5b194677d 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -93,7 +93,7 @@ static long uacce_fops_compat_ioctl(struct file *filep,
 
 static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
 {
-	int pasid;
+	u32 pasid;
 	struct iommu_sva *handle;
 
 	if (!(uacce->flags & UACCE_DEV_SVA))
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 21e950e4ab623e..450717299928bb 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -76,7 +76,7 @@ extern void amd_iommu_free_device(struct pci_dev *pdev);
  *
  * The function returns 0 on success or a negative value on error.
  */
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
+extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
 				struct task_struct *task);
 
 /**
@@ -88,7 +88,7 @@ extern int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
  * When this function returns the device is no longer using the PASID
  * and the PASID is no longer bound to its task.
  */
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
+extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid);
 
 /**
  * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
@@ -114,7 +114,7 @@ extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid);
 #define AMD_IOMMU_INV_PRI_RSP_FAIL	2
 
 typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
-					int pasid,
+					u32 pasid,
 					unsigned long address,
 					u16);
 
@@ -166,7 +166,7 @@ extern int amd_iommu_device_info(struct pci_dev *pdev,
  * @cb: The call-back function
  */
 
-typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, int pasid);
+typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid);
 
 extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
 					   amd_iommu_invalidate_ctx cb);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index b1ed2f25f7c0de..7322073f62d0ea 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -549,7 +549,7 @@ struct dmar_domain {
 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
 	u64		max_addr;	/* maximum mapped address */
 
-	int		default_pasid;	/*
+	u32		default_pasid;	/*
 					 * The default pasid used for non-SVM
 					 * traffic on mediated devices.
 					 */
@@ -708,7 +708,7 @@ void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
 			      u32 pasid, u16 qdep, u64 addr,
 			      unsigned int size_order);
 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu,
-			  int pasid);
+			  u32 pasid);
 
 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
 		   unsigned int count, unsigned long options);
@@ -737,11 +737,11 @@ extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
 int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
 			  struct iommu_gpasid_bind_data *data);
-int intel_svm_unbind_gpasid(struct device *dev, int pasid);
+int intel_svm_unbind_gpasid(struct device *dev, u32 pasid);
 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
 				 void *drvdata);
 void intel_svm_unbind(struct iommu_sva *handle);
-int intel_svm_get_pasid(struct iommu_sva *handle);
+u32 intel_svm_get_pasid(struct iommu_sva *handle);
 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
 			    struct iommu_page_response *msg);
 
@@ -753,7 +753,7 @@ struct intel_svm_dev {
 	struct device *dev;
 	struct svm_dev_ops *ops;
 	struct iommu_sva sva;
-	int pasid;
+	u32 pasid;
 	int users;
 	u16 did;
 	u16 dev_iotlb:1;
@@ -766,7 +766,7 @@ struct intel_svm {
 
 	struct intel_iommu *iommu;
 	int flags;
-	int pasid;
+	u32 pasid;
 	int gpasid; /* In case that guest PASID is different from host PASID */
 	struct list_head devs;
 	struct list_head list;
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index c9e7e601950d6e..39d368a810b856 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -11,7 +11,7 @@
 struct device;
 
 struct svm_dev_ops {
-	void (*fault_cb)(struct device *dev, int pasid, u64 address,
+	void (*fault_cb)(struct device *dev, u32 pasid, u64 address,
 			 void *private, int rwxp, int response);
 };
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index fee209efb7568f..e57e819aaf2e76 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -286,7 +286,7 @@ struct iommu_ops {
 	struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
 				      void *drvdata);
 	void (*sva_unbind)(struct iommu_sva *handle);
-	int (*sva_get_pasid)(struct iommu_sva *handle);
+	u32 (*sva_get_pasid)(struct iommu_sva *handle);
 
 	int (*page_response)(struct device *dev,
 			     struct iommu_fault_event *evt,
@@ -296,7 +296,7 @@ struct iommu_ops {
 	int (*sva_bind_gpasid)(struct iommu_domain *domain,
 			struct device *dev, struct iommu_gpasid_bind_data *data);
 
-	int (*sva_unbind_gpasid)(struct device *dev, int pasid);
+	int (*sva_unbind_gpasid)(struct device *dev, u32 pasid);
 
 	int (*def_domain_type)(struct device *dev);
 
@@ -634,7 +634,7 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev,
 					struct mm_struct *mm,
 					void *drvdata);
 void iommu_sva_unbind_device(struct iommu_sva *handle);
-int iommu_sva_get_pasid(struct iommu_sva *handle);
+u32 iommu_sva_get_pasid(struct iommu_sva *handle);
 
 #else /* CONFIG_IOMMU_API */
 
@@ -1027,7 +1027,7 @@ static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
 {
 }
 
-static inline int iommu_sva_get_pasid(struct iommu_sva *handle)
+static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
 {
 	return IOMMU_PASID_INVALID;
 }
@@ -1046,7 +1046,7 @@ static inline int iommu_sva_bind_gpasid(struct iommu_domain *domain,
 }
 
 static inline int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
-					   struct device *dev, int pasid)
+					   struct device *dev, u32 pasid)
 {
 	return -ENODEV;
 }
diff --git a/include/linux/uacce.h b/include/linux/uacce.h
index 454c2f6672d791..48e319f402751b 100644
--- a/include/linux/uacce.h
+++ b/include/linux/uacce.h
@@ -81,7 +81,7 @@ struct uacce_queue {
 	struct list_head list;
 	struct uacce_qfile_region *qfrs[UACCE_MAX_REGION];
 	enum uacce_q_state state;
-	int pasid;
+	u32 pasid;
 	struct iommu_sva *handle;
 };
 

From 2a5054c6e7b16906984ac36a7363ca46b8d99ade Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:06 -0700
Subject: [PATCH 122/262] iommu/vt-d: Change flags type to unsigned int in
 binding mm

"flags" passed to intel_svm_bind_mm() is a bit mask and should be
defined as "unsigned int" instead of "int".

Change its type to "unsigned int".

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/1600187413-163670-3-git-send-email-fenghua.yu@intel.com
---
 drivers/iommu/intel/svm.c   | 7 ++++---
 include/linux/intel-iommu.h | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index e78a74a9c1cf7a..fc90a079e22846 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -446,7 +446,8 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
 
 /* Caller must hold pasid_mutex, mm reference */
 static int
-intel_svm_bind_mm(struct device *dev, int flags, struct svm_dev_ops *ops,
+intel_svm_bind_mm(struct device *dev, unsigned int flags,
+		  struct svm_dev_ops *ops,
 		  struct mm_struct *mm, struct intel_svm_dev **sd)
 {
 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
@@ -1033,7 +1034,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
 {
 	struct iommu_sva *sva = ERR_PTR(-EINVAL);
 	struct intel_svm_dev *sdev = NULL;
-	int flags = 0;
+	unsigned int flags = 0;
 	int ret;
 
 	/*
@@ -1042,7 +1043,7 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
 	 * and intel_svm etc.
 	 */
 	if (drvdata)
-		flags = *(int *)drvdata;
+		flags = *(unsigned int *)drvdata;
 	mutex_lock(&pasid_mutex);
 	ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
 	if (ret)
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 7322073f62d0ea..9c3e8337442a2a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -765,7 +765,7 @@ struct intel_svm {
 	struct mm_struct *mm;
 
 	struct intel_iommu *iommu;
-	int flags;
+	unsigned int flags;
 	u32 pasid;
 	int gpasid; /* In case that guest PASID is different from host PASID */
 	struct list_head devs;

From 4e7b11567d946ebe14a3d10b697b078971a9da89 Mon Sep 17 00:00:00 2001
From: Ashok Raj <ashok.raj@intel.com>
Date: Tue, 15 Sep 2020 09:30:07 -0700
Subject: [PATCH 123/262] Documentation/x86: Add documentation for SVA (Shared
 Virtual Addressing)

ENQCMD and Data Streaming Accelerator (DSA) and all of their associated
features are a complicated stack with lots of interconnected pieces.
This documentation provides a big picture overview for all of the
features.

Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-4-git-send-email-fenghua.yu@intel.com
---
 Documentation/x86/index.rst |   1 +
 Documentation/x86/sva.rst   | 257 ++++++++++++++++++++++++++++++++++++
 2 files changed, 258 insertions(+)
 create mode 100644 Documentation/x86/sva.rst

diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 265d9e9a093b8d..e5d5ff096685f2 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -30,3 +30,4 @@ x86-specific Documentation
    usb-legacy-support
    i386/index
    x86_64/index
+   sva
diff --git a/Documentation/x86/sva.rst b/Documentation/x86/sva.rst
new file mode 100644
index 00000000000000..076efd51ef1fe9
--- /dev/null
+++ b/Documentation/x86/sva.rst
@@ -0,0 +1,257 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+Shared Virtual Addressing (SVA) with ENQCMD
+===========================================
+
+Background
+==========
+
+Shared Virtual Addressing (SVA) allows the processor and device to use the
+same virtual addresses avoiding the need for software to translate virtual
+addresses to physical addresses. SVA is what PCIe calls Shared Virtual
+Memory (SVM).
+
+In addition to the convenience of using application virtual addresses
+by the device, it also doesn't require pinning pages for DMA.
+PCIe Address Translation Services (ATS) along with Page Request Interface
+(PRI) allow devices to function much the same way as the CPU handling
+application page-faults. For more information please refer to the PCIe
+specification Chapter 10: ATS Specification.
+
+Use of SVA requires IOMMU support in the platform. IOMMU is also
+required to support the PCIe features ATS and PRI. ATS allows devices
+to cache translations for virtual addresses. The IOMMU driver uses the
+mmu_notifier() support to keep the device TLB cache and the CPU cache in
+sync. When an ATS lookup fails for a virtual address, the device should
+use the PRI in order to request the virtual address to be paged into the
+CPU page tables. The device must use ATS again in order the fetch the
+translation before use.
+
+Shared Hardware Workqueues
+==========================
+
+Unlike Single Root I/O Virtualization (SR-IOV), Scalable IOV (SIOV) permits
+the use of Shared Work Queues (SWQ) by both applications and Virtual
+Machines (VM's). This allows better hardware utilization vs. hard
+partitioning resources that could result in under utilization. In order to
+allow the hardware to distinguish the context for which work is being
+executed in the hardware by SWQ interface, SIOV uses Process Address Space
+ID (PASID), which is a 20-bit number defined by the PCIe SIG.
+
+PASID value is encoded in all transactions from the device. This allows the
+IOMMU to track I/O on a per-PASID granularity in addition to using the PCIe
+Resource Identifier (RID) which is the Bus/Device/Function.
+
+
+ENQCMD
+======
+
+ENQCMD is a new instruction on Intel platforms that atomically submits a
+work descriptor to a device. The descriptor includes the operation to be
+performed, virtual addresses of all parameters, virtual address of a completion
+record, and the PASID (process address space ID) of the current process.
+
+ENQCMD works with non-posted semantics and carries a status back if the
+command was accepted by hardware. This allows the submitter to know if the
+submission needs to be retried or other device specific mechanisms to
+implement fairness or ensure forward progress should be provided.
+
+ENQCMD is the glue that ensures applications can directly submit commands
+to the hardware and also permits hardware to be aware of application context
+to perform I/O operations via use of PASID.
+
+Process Address Space Tagging
+=============================
+
+A new thread-scoped MSR (IA32_PASID) provides the connection between
+user processes and the rest of the hardware. When an application first
+accesses an SVA-capable device, this MSR is initialized with a newly
+allocated PASID. The driver for the device calls an IOMMU-specific API
+that sets up the routing for DMA and page-requests.
+
+For example, the Intel Data Streaming Accelerator (DSA) uses
+iommu_sva_bind_device(), which will do the following:
+
+- Allocate the PASID, and program the process page-table (%cr3 register) in the
+  PASID context entries.
+- Register for mmu_notifier() to track any page-table invalidations to keep
+  the device TLB in sync. For example, when a page-table entry is invalidated,
+  the IOMMU propagates the invalidation to the device TLB. This will force any
+  future access by the device to this virtual address to participate in
+  ATS. If the IOMMU responds with proper response that a page is not
+  present, the device would request the page to be paged in via the PCIe PRI
+  protocol before performing I/O.
+
+This MSR is managed with the XSAVE feature set as "supervisor state" to
+ensure the MSR is updated during context switch.
+
+PASID Management
+================
+
+The kernel must allocate a PASID on behalf of each process which will use
+ENQCMD and program it into the new MSR to communicate the process identity to
+platform hardware.  ENQCMD uses the PASID stored in this MSR to tag requests
+from this process.  When a user submits a work descriptor to a device using the
+ENQCMD instruction, the PASID field in the descriptor is auto-filled with the
+value from MSR_IA32_PASID. Requests for DMA from the device are also tagged
+with the same PASID. The platform IOMMU uses the PASID in the transaction to
+perform address translation. The IOMMU APIs setup the corresponding PASID
+entry in IOMMU with the process address used by the CPU (e.g. %cr3 register in
+x86).
+
+The MSR must be configured on each logical CPU before any application
+thread can interact with a device. Threads that belong to the same
+process share the same page tables, thus the same MSR value.
+
+PASID is cleared when a process is created. The PASID allocation and MSR
+programming may occur long after a process and its threads have been created.
+One thread must call iommu_sva_bind_device() to allocate the PASID for the
+process. If a thread uses ENQCMD without the MSR first being populated, a #GP
+will be raised. The kernel will update the PASID MSR with the PASID for all
+threads in the process. A single process PASID can be used simultaneously
+with multiple devices since they all share the same address space.
+
+One thread can call iommu_sva_unbind_device() to free the allocated PASID.
+The kernel will clear the PASID MSR for all threads belonging to the process.
+
+New threads inherit the MSR value from the parent.
+
+Relationships
+=============
+
+ * Each process has many threads, but only one PASID.
+ * Devices have a limited number (~10's to 1000's) of hardware workqueues.
+   The device driver manages allocating hardware workqueues.
+ * A single mmap() maps a single hardware workqueue as a "portal" and
+   each portal maps down to a single workqueue.
+ * For each device with which a process interacts, there must be
+   one or more mmap()'d portals.
+ * Many threads within a process can share a single portal to access
+   a single device.
+ * Multiple processes can separately mmap() the same portal, in
+   which case they still share one device hardware workqueue.
+ * The single process-wide PASID is used by all threads to interact
+   with all devices.  There is not, for instance, a PASID for each
+   thread or each thread<->device pair.
+
+FAQ
+===
+
+* What is SVA/SVM?
+
+Shared Virtual Addressing (SVA) permits I/O hardware and the processor to
+work in the same address space, i.e., to share it. Some call it Shared
+Virtual Memory (SVM), but Linux community wanted to avoid confusing it with
+POSIX Shared Memory and Secure Virtual Machines which were terms already in
+circulation.
+
+* What is a PASID?
+
+A Process Address Space ID (PASID) is a PCIe-defined Transaction Layer Packet
+(TLP) prefix. A PASID is a 20-bit number allocated and managed by the OS.
+PASID is included in all transactions between the platform and the device.
+
+* How are shared workqueues different?
+
+Traditionally, in order for userspace applications to interact with hardware,
+there is a separate hardware instance required per process. For example,
+consider doorbells as a mechanism of informing hardware about work to process.
+Each doorbell is required to be spaced 4k (or page-size) apart for process
+isolation. This requires hardware to provision that space and reserve it in
+MMIO. This doesn't scale as the number of threads becomes quite large. The
+hardware also manages the queue depth for Shared Work Queues (SWQ), and
+consumers don't need to track queue depth. If there is no space to accept
+a command, the device will return an error indicating retry.
+
+A user should check Deferrable Memory Write (DMWr) capability on the device
+and only submits ENQCMD when the device supports it. In the new DMWr PCIe
+terminology, devices need to support DMWr completer capability. In addition,
+it requires all switch ports to support DMWr routing and must be enabled by
+the PCIe subsystem, much like how PCIe atomic operations are managed for
+instance.
+
+SWQ allows hardware to provision just a single address in the device. When
+used with ENQCMD to submit work, the device can distinguish the process
+submitting the work since it will include the PASID assigned to that
+process. This helps the device scale to a large number of processes.
+
+* Is this the same as a user space device driver?
+
+Communicating with the device via the shared workqueue is much simpler
+than a full blown user space driver. The kernel driver does all the
+initialization of the hardware. User space only needs to worry about
+submitting work and processing completions.
+
+* Is this the same as SR-IOV?
+
+Single Root I/O Virtualization (SR-IOV) focuses on providing independent
+hardware interfaces for virtualizing hardware. Hence, it's required to be
+almost fully functional interface to software supporting the traditional
+BARs, space for interrupts via MSI-X, its own register layout.
+Virtual Functions (VFs) are assisted by the Physical Function (PF)
+driver.
+
+Scalable I/O Virtualization builds on the PASID concept to create device
+instances for virtualization. SIOV requires host software to assist in
+creating virtual devices; each virtual device is represented by a PASID
+along with the bus/device/function of the device.  This allows device
+hardware to optimize device resource creation and can grow dynamically on
+demand. SR-IOV creation and management is very static in nature. Consult
+references below for more details.
+
+* Why not just create a virtual function for each app?
+
+Creating PCIe SR-IOV type Virtual Functions (VF) is expensive. VFs require
+duplicated hardware for PCI config space and interrupts such as MSI-X.
+Resources such as interrupts have to be hard partitioned between VFs at
+creation time, and cannot scale dynamically on demand. The VFs are not
+completely independent from the Physical Function (PF). Most VFs require
+some communication and assistance from the PF driver. SIOV, in contrast,
+creates a software-defined device where all the configuration and control
+aspects are mediated via the slow path. The work submission and completion
+happen without any mediation.
+
+* Does this support virtualization?
+
+ENQCMD can be used from within a guest VM. In these cases, the VMM helps
+with setting up a translation table to translate from Guest PASID to Host
+PASID. Please consult the ENQCMD instruction set reference for more
+details.
+
+* Does memory need to be pinned?
+
+When devices support SVA along with platform hardware such as IOMMU
+supporting such devices, there is no need to pin memory for DMA purposes.
+Devices that support SVA also support other PCIe features that remove the
+pinning requirement for memory.
+
+Device TLB support - Device requests the IOMMU to lookup an address before
+use via Address Translation Service (ATS) requests.  If the mapping exists
+but there is no page allocated by the OS, IOMMU hardware returns that no
+mapping exists.
+
+Device requests the virtual address to be mapped via Page Request
+Interface (PRI). Once the OS has successfully completed the mapping, it
+returns the response back to the device. The device requests again for
+a translation and continues.
+
+IOMMU works with the OS in managing consistency of page-tables with the
+device. When removing pages, it interacts with the device to remove any
+device TLB entry that might have been cached before removing the mappings from
+the OS.
+
+References
+==========
+
+VT-D:
+https://01.org/blogs/ashokraj/2018/recent-enhancements-intel-virtualization-technology-directed-i/o-intel-vt-d
+
+SIOV:
+https://01.org/blogs/2019/assignable-interfaces-intel-scalable-i/o-virtualization-linux
+
+ENQCMD in ISE:
+https://software.intel.com/sites/default/files/managed/c5/15/architecture-instruction-set-extensions-programming-reference.pdf
+
+DSA spec:
+https://software.intel.com/sites/default/files/341204-intel-data-streaming-accelerator-spec.pdf

From ff4f82816dff28ffaaff96d1409bb3811d345514 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:08 -0700
Subject: [PATCH 124/262] x86/cpufeatures: Enumerate ENQCMD and ENQCMDS
 instructions

Work submission instruction comes in two flavors. ENQCMD can be called
both in ring 3 and ring 0 and always uses the contents of a PASID MSR
when shipping the command to the device. ENQCMDS allows a kernel driver
to submit commands on behalf of a user process. The driver supplies the
PASID value in ENQCMDS. There isn't any usage of ENQCMD in the kernel as
of now.

The CPU feature flag is shown as "enqcmd" in /proc/cpuinfo.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-5-git-send-email-fenghua.yu@intel.com
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kernel/cpu/cpuid-deps.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366c0..fea10d04d05f57 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -353,6 +353,7 @@
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD		(16*32+29) /* ENQCMD and ENQCMDS instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 3cbe24ca80abd9..3a02707c1f4d0b 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -69,6 +69,7 @@ static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_CQM_MBM_TOTAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_CQM_MBM_LOCAL,		X86_FEATURE_CQM_LLC   },
 	{ X86_FEATURE_AVX512_BF16,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_ENQCMD,			X86_FEATURE_XSAVES    },
 	{}
 };
 

From b454feb9abc1a9ee876fb84bfea0fc8d726f5bc4 Mon Sep 17 00:00:00 2001
From: Yu-cheng Yu <yu-cheng.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:09 -0700
Subject: [PATCH 125/262] x86/fpu/xstate: Add supervisor PASID state for ENQCMD

The ENQCMD instruction reads a PASID from the IA32_PASID MSR. The
MSR is stored in the task's supervisor XSAVE* PASID state and is
context-switched by XSAVES/XRSTORS.

 [ bp: Add (in-)definite articles and massage. ]

Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-6-git-send-email-fenghua.yu@intel.com
---
 arch/x86/include/asm/fpu/types.h  | 11 ++++++++++-
 arch/x86/include/asm/fpu/xstate.h |  2 +-
 arch/x86/kernel/fpu/xstate.c      |  6 +++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c87364ea644656..f5a38a5f3ae11e 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -114,7 +114,7 @@ enum xfeature {
 	XFEATURE_Hi16_ZMM,
 	XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
 	XFEATURE_PKRU,
-	XFEATURE_RSRVD_COMP_10,
+	XFEATURE_PASID,
 	XFEATURE_RSRVD_COMP_11,
 	XFEATURE_RSRVD_COMP_12,
 	XFEATURE_RSRVD_COMP_13,
@@ -134,6 +134,7 @@ enum xfeature {
 #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
 #define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
+#define XFEATURE_MASK_PASID		(1 << XFEATURE_PASID)
 #define XFEATURE_MASK_LBR		(1 << XFEATURE_LBR)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
@@ -256,6 +257,14 @@ struct arch_lbr_state {
 	struct lbr_entry		entries[];
 } __packed;
 
+/*
+ * State component 10 is supervisor state used for context-switching the
+ * PASID state.
+ */
+struct ia32_pasid_state {
+	u64 pasid;
+} __packed;
+
 struct xstate_header {
 	u64				xfeatures;
 	u64				xcomp_bv;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 14ab815132d4d2..47a92232d59531 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -35,7 +35,7 @@
 				      XFEATURE_MASK_BNDCSR)
 
 /* All currently supported supervisor features */
-#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0)
+#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
 
 /*
  * A supervisor state component may not always contain valuable information,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 038e19c0019ed9..67f1a03b9b2356 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -37,6 +37,7 @@ static const char *xfeature_names[] =
 	"AVX-512 ZMM_Hi256"		,
 	"Processor Trace (unused)"	,
 	"Protection Keys User registers",
+	"PASID state",
 	"unknown xstate feature"	,
 };
 
@@ -51,6 +52,7 @@ static short xsave_cpuid_features[] __initdata = {
 	X86_FEATURE_AVX512F,
 	X86_FEATURE_INTEL_PT,
 	X86_FEATURE_PKU,
+	X86_FEATURE_ENQCMD,
 };
 
 /*
@@ -318,6 +320,7 @@ static void __init print_xstate_features(void)
 	print_xstate_feature(XFEATURE_MASK_ZMM_Hi256);
 	print_xstate_feature(XFEATURE_MASK_Hi16_ZMM);
 	print_xstate_feature(XFEATURE_MASK_PKRU);
+	print_xstate_feature(XFEATURE_MASK_PASID);
 }
 
 /*
@@ -592,6 +595,7 @@ static void check_xstate_against_struct(int nr)
 	XCHECK_SZ(sz, nr, XFEATURE_ZMM_Hi256, struct avx_512_zmm_uppers_state);
 	XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM,  struct avx_512_hi16_state);
 	XCHECK_SZ(sz, nr, XFEATURE_PKRU,      struct pkru_state);
+	XCHECK_SZ(sz, nr, XFEATURE_PASID,     struct ia32_pasid_state);
 
 	/*
 	 * Make *SURE* to add any feature numbers in below if
@@ -601,7 +605,7 @@ static void check_xstate_against_struct(int nr)
 	if ((nr < XFEATURE_YMM) ||
 	    (nr >= XFEATURE_MAX) ||
 	    (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) ||
-	    ((nr >= XFEATURE_RSRVD_COMP_10) && (nr <= XFEATURE_LBR))) {
+	    ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) {
 		WARN_ONCE(1, "no structure for xstate: %d\n", nr);
 		XSTATE_WARN_ON(1);
 	}

From f0f2f9feb4ee6f28729e5388da3c03ce1dac077a Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:10 -0700
Subject: [PATCH 126/262] x86/msr-index: Define an IA32_PASID MSR

The IA32_PASID MSR (0xd93) contains the Process Address Space Identifier
(PASID), a 20-bit value. Bit 31 must be set to indicate the value
programmed in the MSR is valid. Hardware uses the PASID to identify a
process address space and direct responses to the right address space.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-7-git-send-email-fenghua.yu@intel.com
---
 arch/x86/include/asm/msr-index.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 2859ee4f39a83f..aaddc6a9e2374f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -257,6 +257,9 @@
 #define MSR_IA32_LASTINTFROMIP		0x000001dd
 #define MSR_IA32_LASTINTTOIP		0x000001de
 
+#define MSR_IA32_PASID			0x00000d93
+#define MSR_IA32_PASID_VALID		BIT_ULL(31)
+
 /* DEBUGCTLMSR bits (others vary by model): */
 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
 #define DEBUGCTLMSR_BTF_SHIFT		1

From 52ad9bc64c74167466e70e0df4b99ee5ccef0078 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:11 -0700
Subject: [PATCH 127/262] mm: Add a pasid member to struct mm_struct

A PASID is shared by all threads in a process. So the logical place to
keep track of it is in the mm_struct. Both ARM and x86 would use this
PASID.

 [ bp: Massage commit message. ]

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-8-git-send-email-fenghua.yu@intel.com
---
 include/linux/mm_types.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 496c3ff97cce7a..1ff0615ef19f6e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -542,6 +542,10 @@ struct mm_struct {
 		atomic_long_t hugetlb_usage;
 #endif
 		struct work_struct async_put_work;
+
+#ifdef CONFIG_IOMMU_SUPPORT
+		u32 pasid;
+#endif
 	} __randomize_layout;
 
 	/*

From 1478b99a76534b6c244cfe24fa616280a9441118 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:12 -0700
Subject: [PATCH 128/262] x86/cpufeatures: Mark ENQCMD as disabled when
 configured out

Currently, the ENQCMD feature depends on CONFIG_IOMMU_SUPPORT. Add
X86_FEATURE_ENQCMD to the disabled features mask so that it gets
disabled when the IOMMU config option above is not enabled.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-9-git-send-email-fenghua.yu@intel.com
---
 arch/x86/include/asm/disabled-features.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 4ea8584682f998..5861d34f977182 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,6 +56,12 @@
 # define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
 #endif
 
+#ifdef CONFIG_IOMMU_SUPPORT
+# define DISABLE_ENQCMD	0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -75,7 +81,8 @@
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
+#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+			 DISABLE_ENQCMD)
 #define DISABLED_MASK17	0
 #define DISABLED_MASK18	0
 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)

From 20f0afd1fb3d7d44f4a3db5a4b6e904410862140 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Tue, 15 Sep 2020 09:30:13 -0700
Subject: [PATCH 129/262] x86/mmu: Allocate/free a PASID

A PASID is allocated for an "mm" the first time any thread binds to an
SVA-capable device and is freed from the "mm" when the SVA is unbound
by the last thread. It's possible for the "mm" to have different PASID
values in different binding/unbinding SVA cycles.

The mm's PASID (non-zero for valid PASID or 0 for invalid PASID) is
propagated to a per-thread PASID MSR for all threads within the mm
through IPI, context switch, or inherited. This is done to ensure that a
running thread has the right PASID in the MSR matching the mm's PASID.

 [ bp: s/SVM/SVA/g; massage. ]

Suggested-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/1600187413-163670-10-git-send-email-fenghua.yu@intel.com
---
 arch/x86/include/asm/fpu/api.h      | 12 ++++++
 arch/x86/include/asm/fpu/internal.h |  7 ++++
 arch/x86/kernel/fpu/xstate.c        | 57 +++++++++++++++++++++++++++++
 drivers/iommu/intel/svm.c           | 28 +++++++++++++-
 4 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b774c52e5411fc..dcd9503b109838 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -62,4 +62,16 @@ extern void switch_fpu_return(void);
  */
 extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
 
+/*
+ * Tasks that are not using SVA have mm->pasid set to zero to note that they
+ * will not have the valid bit set in MSR_IA32_PASID while they are running.
+ */
+#define PASID_DISABLED	0
+
+#ifdef CONFIG_IOMMU_SUPPORT
+/* Update current's PASID MSR/state by mm's PASID. */
+void update_pasid(void);
+#else
+static inline void update_pasid(void) { }
+#endif
 #endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 0a460f2a3f9041..341d00eba3f843 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -583,6 +583,13 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 			pkru_val = pk->pkru;
 	}
 	__write_pkru(pkru_val);
+
+	/*
+	 * Expensive PASID MSR write will be avoided in update_pasid() because
+	 * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
+	 * unless it's different from mm->pasid to reduce overhead.
+	 */
+	update_pasid();
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 67f1a03b9b2356..5d8047441a0aaf 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1402,3 +1402,60 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
+#ifdef CONFIG_IOMMU_SUPPORT
+void update_pasid(void)
+{
+	u64 pasid_state;
+	u32 pasid;
+
+	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+		return;
+
+	if (!current->mm)
+		return;
+
+	pasid = READ_ONCE(current->mm->pasid);
+	/* Set the valid bit in the PASID MSR/state only for valid pasid. */
+	pasid_state = pasid == PASID_DISABLED ?
+		      pasid : pasid | MSR_IA32_PASID_VALID;
+
+	/*
+	 * No need to hold fregs_lock() since the task's fpstate won't
+	 * be changed by others (e.g. ptrace) while the task is being
+	 * switched to or is in IPI.
+	 */
+	if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+		/* The MSR is active and can be directly updated. */
+		wrmsrl(MSR_IA32_PASID, pasid_state);
+	} else {
+		struct fpu *fpu = &current->thread.fpu;
+		struct ia32_pasid_state *ppasid_state;
+		struct xregs_state *xsave;
+
+		/*
+		 * The CPU's xstate registers are not currently active. Just
+		 * update the PASID state in the memory buffer here. The
+		 * PASID MSR will be loaded when returning to user mode.
+		 */
+		xsave = &fpu->state.xsave;
+		xsave->header.xfeatures |= XFEATURE_MASK_PASID;
+		ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
+		/*
+		 * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
+		 * won't be NULL and no need to check its value.
+		 *
+		 * Only update the task's PASID state when it's different
+		 * from the mm's pasid.
+		 */
+		if (ppasid_state->pasid != pasid_state) {
+			/*
+			 * Invalid fpregs so that state restoring will pick up
+			 * the PASID state.
+			 */
+			__fpu_invalidate_fpregs_state(fpu);
+			ppasid_state->pasid = pasid_state;
+		}
+	}
+}
+#endif /* CONFIG_IOMMU_SUPPORT */
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index fc90a079e22846..60ffe083b6d634 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -19,6 +19,7 @@
 #include <linux/mm_types.h>
 #include <linux/ioasid.h>
 #include <asm/page.h>
+#include <asm/fpu/api.h>
 
 #include "pasid.h"
 
@@ -444,6 +445,24 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
 	return ret;
 }
 
+static void _load_pasid(void *unused)
+{
+	update_pasid();
+}
+
+static void load_pasid(struct mm_struct *mm, u32 pasid)
+{
+	mutex_lock(&mm->context.lock);
+
+	/* Synchronize with READ_ONCE in update_pasid(). */
+	smp_store_release(&mm->pasid, pasid);
+
+	/* Update PASID MSR on all CPUs running the mm's tasks. */
+	on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
+
+	mutex_unlock(&mm->context.lock);
+}
+
 /* Caller must hold pasid_mutex, mm reference */
 static int
 intel_svm_bind_mm(struct device *dev, unsigned int flags,
@@ -591,6 +610,10 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
 		}
 
 		list_add_tail(&svm->list, &global_svm_list);
+		if (mm) {
+			/* The newly allocated pasid is loaded to the mm. */
+			load_pasid(mm, svm->pasid);
+		}
 	} else {
 		/*
 		 * Binding a new device with existing PASID, need to setup
@@ -654,8 +677,11 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
 
 			if (list_empty(&svm->devs)) {
 				ioasid_free(svm->pasid);
-				if (svm->mm)
+				if (svm->mm) {
 					mmu_notifier_unregister(&svm->notifier, svm->mm);
+					/* Clear mm's pasid. */
+					load_pasid(svm->mm, PASID_DISABLED);
+				}
 				list_del(&svm->list);
 				/* We mandate that no page faults may be outstanding
 				 * for the PASID when intel_svm_unbind_mm() is called.

From 07def58717dab4ff0055c61c316df65d59c53c83 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Mon, 14 Sep 2020 14:53:58 +0800
Subject: [PATCH 130/262] EDAC/aspeed: Use module_platform_driver() to simplify

Use module_platform_driver() which makes the code simpler by eliminating
boilerplate code.

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lkml.kernel.org/r/20200914065358.3726216-1-liushixin2@huawei.com
---
 drivers/edac/aspeed_edac.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c
index fbec28dc661d79..fde809efc52051 100644
--- a/drivers/edac/aspeed_edac.c
+++ b/drivers/edac/aspeed_edac.c
@@ -388,23 +388,7 @@ static struct platform_driver aspeed_driver = {
 	.probe		= aspeed_probe,
 	.remove		= aspeed_remove
 };
-
-
-static int __init aspeed_init(void)
-{
-	return platform_driver_register(&aspeed_driver);
-}
-
-
-static void __exit aspeed_exit(void)
-{
-	platform_driver_unregister(&aspeed_driver);
-}
-
-
-module_init(aspeed_init);
-module_exit(aspeed_exit);
-
+module_platform_driver(aspeed_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Stefan Schaeckeler <sschaeck@cisco.com>");

From e6bbde8b2b4f5aeb129c5a6cb99f0b773c871b8c Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Mon, 14 Sep 2020 10:48:54 +0800
Subject: [PATCH 131/262] EDAC/mc_sysfs: Add missing newlines when printing
 {max,dimm}_location

Reading those sysfs entries gives:

  [root@localhost /]# cat /sys/devices/system/edac/mc/mc0/max_location
  memory 3 [root@localhost /]# cat /sys/devices/system/edac/mc/mc0/dimm0/dimm_location
  memory 0 [root@localhost /]#

Add newlines after the value it prints for better readability.

  [ bp: Make len a signed int and change the check to catch wraparound.
    Increment the pointer p only when the length check passes. Use
    scnprintf(). ]

Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/1600051734-8993-1-git-send-email-wangxiongfeng2@huawei.com
---
 drivers/edac/edac_mc_sysfs.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4e6aca595133ac..2f9f1e74bb35e7 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -474,8 +474,12 @@ static ssize_t dimmdev_location_show(struct device *dev,
 				     struct device_attribute *mattr, char *data)
 {
 	struct dimm_info *dimm = to_dimm(dev);
+	ssize_t count;
 
-	return edac_dimm_info_location(dimm, data, PAGE_SIZE);
+	count = edac_dimm_info_location(dimm, data, PAGE_SIZE);
+	count += scnprintf(data + count, PAGE_SIZE - count, "\n");
+
+	return count;
 }
 
 static ssize_t dimmdev_label_show(struct device *dev,
@@ -813,15 +817,23 @@ static ssize_t mci_max_location_show(struct device *dev,
 				     char *data)
 {
 	struct mem_ctl_info *mci = to_mci(dev);
-	int i;
+	int len = PAGE_SIZE;
 	char *p = data;
+	int i, n;
 
 	for (i = 0; i < mci->n_layers; i++) {
-		p += sprintf(p, "%s %d ",
-			     edac_layer_name[mci->layers[i].type],
-			     mci->layers[i].size - 1);
+		n = scnprintf(p, len, "%s %d ",
+			      edac_layer_name[mci->layers[i].type],
+			      mci->layers[i].size - 1);
+		len -= n;
+		if (len <= 0)
+			goto out;
+
+		p += n;
 	}
 
+	p += scnprintf(p, len, "\n");
+out:
 	return p - data;
 }
 

From 5866e9205b47a983a77ebc8654949f696342f2ab Mon Sep 17 00:00:00 2001
From: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Date: Thu, 17 Sep 2020 21:20:36 +0000
Subject: [PATCH 132/262] x86/cpu: Add hardware-enforced cache coherency as a
 CPUID feature

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a system,
it is not required for software to flush the page from all CPU caches in the
system prior to changing the value of the C-bit for a page. This hardware-
enforced cache coherency is indicated by EAX[10] in CPUID leaf 0x8000001f.

 [ bp: Use one of the free slots in word 3. ]

Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200917212038.5090-2-krish.sadhukhan@oracle.com
---
 arch/x86/include/asm/cpufeatures.h | 2 +-
 arch/x86/kernel/cpu/scattered.c    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 83fc9d38eb1fca..50b2a8d85ef037 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-/* free					( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT	( 3*32+17) /* "" AMD hardware-enforced cache coherency */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 62b137c3c97a20..3221b71a0df836 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
 	{ X86_FEATURE_MBA,		CPUID_EBX,  6, 0x80000008, 0 },
 	{ X86_FEATURE_SME,		CPUID_EAX,  0, 0x8000001f, 0 },
 	{ X86_FEATURE_SEV,		CPUID_EAX,  1, 0x8000001f, 0 },
+	{ X86_FEATURE_SME_COHERENT,	CPUID_EAX, 10, 0x8000001f, 0 },
 	{ 0, 0, 0, 0, 0 }
 };
 

From 75d1cc0e05af579301ce4e49cf6399be4b4e6e76 Mon Sep 17 00:00:00 2001
From: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Date: Thu, 17 Sep 2020 21:20:37 +0000
Subject: [PATCH 133/262] x86/mm/pat: Don't flush cache if hardware enforces
 cache coherency across encryption domnains

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page is enforced. In such a
system, it is not required for software to flush the page from all CPU
caches in the system prior to changing the value of the C-bit for the
page. So check that bit before flushing the cache.

 [ bp: Massage commit message. ]

Suggested-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200917212038.5090-3-krish.sadhukhan@oracle.com
---
 arch/x86/mm/pat/set_memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index d1b2a889f035d8..40baa90e74f4ca 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 	/*
 	 * Before changing the encryption attribute, we need to flush caches.
 	 */
-	cpa_flush(&cpa, 1);
+	cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
 
 	ret = __change_page_attr_set_clr(&cpa, 1);
 

From 5fd39dc220279108131edbc85832a5fef821a826 Mon Sep 17 00:00:00 2001
From: Clint Sbisa <csbisa@amazon.com>
Date: Fri, 18 Sep 2020 03:33:12 +0000
Subject: [PATCH 134/262] arm64: Enable PCI write-combine resources under sysfs

This change exposes write-combine mappings under sysfs for
prefetchable PCI resources on arm64.

Originally, the usage of "write combine" here was driven by the x86
definition of write combine. This definition is specific to x86 and
does not generalize to other architectures. However, the usage of WC
has mutated to "write combine" semantics, which is implemented
differently on each arch.

Generally, prefetchable BARs are accepted to allow speculative
accesses, write combining, and re-ordering-- from the PCI perspective,
this means there are no read side effects. (This contradicts the PCI
spec which allows prefetchable BARs to have read side effects, but
this definition is ill-advised as it is impossible to meet.) On x86,
prefetchable BARs are mapped as WC as originally defined (with some
conditionals on arch features). On arm64, WC is taken to mean normal
non-cacheable memory.

In practice, write combine semantics are used to minimize write
operations. A common usage of this is minimizing PCI TLPs which can
significantly improve performance with PCI devices. In order to
provide the same benefits to userspace, we need to allow userspace to
map prefetchable BARs with write combine semantics. The resourceX_wc
mapping is used today by userspace programs and libraries.

While this model is flawed as "write combine" is very ill-defined, it
is already used by multiple non-x86 archs to expose write combine
semantics to user space. We enable this on arm64 to give userspace on
arm64 an equivalent mechanism for utilizing write combining with PCI
devices.

Signed-off-by: Clint Sbisa <csbisa@amazon.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200918033312.ddfpibgfylfjpex2@amazon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pci.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
index 70b323cf83000d..b33ca260e3c9df 100644
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -17,6 +17,7 @@
 #define pcibios_assign_all_busses() \
 	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
 
+#define arch_can_pci_mmap_wc() 1
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
 
 extern int isa_dma_bridge_buggy;

From e74e1d55728509b352e4eec4283dd5b2781b2070 Mon Sep 17 00:00:00 2001
From: Boyan Karatotev <boyan.karatotev@arm.com>
Date: Fri, 18 Sep 2020 11:47:12 +0100
Subject: [PATCH 135/262] kselftests/arm64: add a basic Pointer Authentication
 test

PAuth signs and verifies return addresses on the stack. It does so by
inserting a Pointer Authentication code (PAC) into some of the unused top
bits of an address. This is achieved by adding paciasp/autiasp instructions
at the beginning and end of a function.

This feature is partially backwards compatible with earlier versions of the
ARM architecture. To coerce the compiler into emitting fully backwards
compatible code the main file is compiled to target an earlier ARM version.
This allows the tests to check for the feature and print meaningful error
messages instead of crashing.

Add a test to verify that corrupting the return address results in a
SIGSEGV on return.

Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200918104715.182310-2-boian4o1@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/Makefile        |  2 +-
 .../testing/selftests/arm64/pauth/.gitignore  |  1 +
 tools/testing/selftests/arm64/pauth/Makefile  | 32 ++++++++++++++
 tools/testing/selftests/arm64/pauth/helper.h  |  9 ++++
 tools/testing/selftests/arm64/pauth/pac.c     | 44 +++++++++++++++++++
 .../selftests/arm64/pauth/pac_corruptor.S     | 19 ++++++++
 6 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/pauth/.gitignore
 create mode 100644 tools/testing/selftests/arm64/pauth/Makefile
 create mode 100644 tools/testing/selftests/arm64/pauth/helper.h
 create mode 100644 tools/testing/selftests/arm64/pauth/pac.c
 create mode 100644 tools/testing/selftests/arm64/pauth/pac_corruptor.S

diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b2e..525506fd97b914 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 00000000000000..b557c916720a2f
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1 @@
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 00000000000000..01d35aaa610a8f
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o
+	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 00000000000000..3e0a2a404bf431
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+void pac_corruptor(void);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 00000000000000..0293310ba70ac6
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <sys/auxv.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+	unsigned long hwcaps = getauxval(AT_HWCAP); \
+	/* data key instructions are not in NOP space. This prevents a SIGILL */ \
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+} while (0)
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+	if (signum == SIGSEGV || signum == SIGILL)
+		siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+	struct sigaction sa;
+
+	ASSERT_PAUTH_ENABLED();
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sa.sa_sigaction = pac_signal_handler;
+		sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+		sigemptyset(&sa.sa_mask);
+
+		sigaction(SIGSEGV, &sa, NULL);
+		sigaction(SIGILL, &sa, NULL);
+
+		pac_corruptor();
+		ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 00000000000000..aa6588050752bc
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail.  It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+	paciasp
+
+	/* corrupt the top bit of the PAC */
+	eor lr, lr, #1 << 53
+
+	autiasp
+	ret

From 766d95b1ed93ebdd07ac87490e60e442342f5dc4 Mon Sep 17 00:00:00 2001
From: Boyan Karatotev <boyan.karatotev@arm.com>
Date: Fri, 18 Sep 2020 11:47:13 +0100
Subject: [PATCH 136/262] kselftests/arm64: add nop checks for PAuth tests

PAuth adds sign/verify controls to enable and disable groups of
instructions in hardware for compatibility with libraries that do not
implement PAuth. The kernel always enables them if it detects PAuth.

Add a test that checks that each group of instructions is enabled, if the
kernel reports PAuth as detected.

Note: For groups, for the purpose of this patch, we intend instructions
that use a certain key.

Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200918104715.182310-3-boian4o1@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../testing/selftests/arm64/pauth/.gitignore  |  1 +
 tools/testing/selftests/arm64/pauth/Makefile  |  7 ++-
 tools/testing/selftests/arm64/pauth/helper.c  | 39 ++++++++++++++
 tools/testing/selftests/arm64/pauth/helper.h  |  9 ++++
 tools/testing/selftests/arm64/pauth/pac.c     | 51 +++++++++++++++++++
 5 files changed, 105 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/arm64/pauth/helper.c

diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
index b557c916720a2f..155137d9272298 100644
--- a/tools/testing/selftests/arm64/pauth/.gitignore
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -1 +1,2 @@
+exec_target
 pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
index 01d35aaa610a8f..5c0dd129562fe3 100644
--- a/tools/testing/selftests/arm64/pauth/Makefile
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -12,7 +12,7 @@ pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/nu
 
 ifeq ($(pauth_cc_support),1)
 TEST_GEN_PROGS := pac
-TEST_GEN_FILES := pac_corruptor.o
+TEST_GEN_FILES := pac_corruptor.o helper.o
 endif
 
 include ../../lib.mk
@@ -23,10 +23,13 @@ ifeq ($(pauth_cc_support),1)
 $(OUTPUT)/pac_corruptor.o: pac_corruptor.S
 	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
 
+$(OUTPUT)/helper.o: helper.c
+	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
 # when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
 # greater, gcc emits pac* instructions which are not in HINT NOP space,
 # preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
 # run on earlier targets and print a meaningful error messages
-$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
 	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
 endif
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 00000000000000..2c201e7d0d5044
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+	asm volatile("paciza %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+	asm volatile("pacizb %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+	asm volatile("pacdza %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+	asm volatile("pacdzb %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+	/* output is encoded in the upper 32 bits */
+	size_t dest = 0;
+	size_t modifier = 0;
+
+	asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+	return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
index 3e0a2a404bf431..35c4f3357ae3e0 100644
--- a/tools/testing/selftests/arm64/pauth/helper.h
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -4,6 +4,15 @@
 #ifndef _HELPER_H_
 #define _HELPER_H_
 
+#include <stdlib.h>
+
 void pac_corruptor(void);
 
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
 #endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 0293310ba70ac6..bd3d4c0eca9d9f 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -8,12 +8,25 @@
 #include "../../kselftest_harness.h"
 #include "helper.h"
 
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
 #define ASSERT_PAUTH_ENABLED() \
 do { \
 	unsigned long hwcaps = getauxval(AT_HWCAP); \
 	/* data key instructions are not in NOP space. This prevents a SIGILL */ \
 	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
 } while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+	unsigned long hwcaps = getauxval(AT_HWCAP); \
+	/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+	ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+} while (0)
 
 sigjmp_buf jmpbuf;
 void pac_signal_handler(int signum, siginfo_t *si, void *uc)
@@ -41,4 +54,42 @@ TEST(corrupt_pac)
 	}
 }
 
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+	size_t keyia = 0;
+	size_t keyib = 0;
+	size_t keyda = 0;
+	size_t keydb = 0;
+
+	ASSERT_PAUTH_ENABLED();
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		keyia |= keyia_sign(i) & PAC_MASK;
+		keyib |= keyib_sign(i) & PAC_MASK;
+		keyda |= keyda_sign(i) & PAC_MASK;
+		keydb |= keydb_sign(i) & PAC_MASK;
+	}
+
+	ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+	ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+	ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+	ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+	size_t keyg = 0;
+
+	ASSERT_GENERIC_PAUTH_ENABLED();
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+		keyg |= keyg_sign(i) & PAC_MASK;
+
+	ASSERT_NE(0, keyg)  TH_LOG("keyg instructions did nothing");
+}
+
 TEST_HARNESS_MAIN

From 806a15b2545e6b5949f14ea8401ce295bd38a018 Mon Sep 17 00:00:00 2001
From: Boyan Karatotev <boyan.karatotev@arm.com>
Date: Fri, 18 Sep 2020 11:47:14 +0100
Subject: [PATCH 137/262] kselftests/arm64: add PAuth test for whether exec()
 changes keys

Kernel documentation states that it will change PAuth keys on exec() calls.

Verify that all keys are correctly switched to new ones.

Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200918104715.182310-4-boian4o1@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/pauth/Makefile  |   4 +
 .../selftests/arm64/pauth/exec_target.c       |  34 ++++
 tools/testing/selftests/arm64/pauth/helper.h  |  10 ++
 tools/testing/selftests/arm64/pauth/pac.c     | 150 ++++++++++++++++++
 4 files changed, 198 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/pauth/exec_target.c

diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
index 5c0dd129562fe3..72e290b0b10c1e 100644
--- a/tools/testing/selftests/arm64/pauth/Makefile
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -13,6 +13,7 @@ pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/nu
 ifeq ($(pauth_cc_support),1)
 TEST_GEN_PROGS := pac
 TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
 endif
 
 include ../../lib.mk
@@ -30,6 +31,9 @@ $(OUTPUT)/helper.o: helper.c
 # greater, gcc emits pac* instructions which are not in HINT NOP space,
 # preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
 # run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
 $(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
 	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
 endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 00000000000000..4435600ca400dd
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+	struct signatures signed_vals;
+	unsigned long hwcaps;
+	size_t val;
+
+	fread(&val, sizeof(size_t), 1, stdin);
+
+	/* don't try to execute illegal (unimplemented) instructions) caller
+	 * should have checked this and keep worker simple
+	 */
+	hwcaps = getauxval(AT_HWCAP);
+
+	if (hwcaps & HWCAP_PACA) {
+		signed_vals.keyia = keyia_sign(val);
+		signed_vals.keyib = keyib_sign(val);
+		signed_vals.keyda = keyda_sign(val);
+		signed_vals.keydb = keydb_sign(val);
+	}
+	signed_vals.keyg = (hwcaps & HWCAP_PACG) ?  keyg_sign(val) : 0;
+
+	fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
index 35c4f3357ae3e0..652496c7b411cf 100644
--- a/tools/testing/selftests/arm64/pauth/helper.h
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -6,6 +6,16 @@
 
 #include <stdlib.h>
 
+#define NKEYS 5
+
+struct signatures {
+	size_t keyia;
+	size_t keyib;
+	size_t keyda;
+	size_t keydb;
+	size_t keyg;
+};
+
 void pac_corruptor(void);
 
 /* PAuth sign a value with key ia and modifier value 0 */
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index bd3d4c0eca9d9f..b363ad6a0b50aa 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -2,6 +2,8 @@
 // Copyright (C) 2020 ARM Limited
 
 #include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include <signal.h>
 #include <setjmp.h>
 
@@ -28,6 +30,117 @@ do { \
 	ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
 } while (0)
 
+void sign_specific(struct signatures *sign, size_t val)
+{
+	sign->keyia = keyia_sign(val);
+	sign->keyib = keyib_sign(val);
+	sign->keyda = keyda_sign(val);
+	sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+	sign->keyia = keyia_sign(val);
+	sign->keyib = keyib_sign(val);
+	sign->keyda = keyda_sign(val);
+	sign->keydb = keydb_sign(val);
+	sign->keyg  = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+	int res = 0;
+
+	res += old->keyia == new->keyia;
+	res += old->keyib == new->keyib;
+	res += old->keyda == new->keyda;
+	res += old->keydb == new->keydb;
+	if (nkeys == NKEYS)
+		res += old->keyg == new->keyg;
+
+	return res;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+	int new_stdin[2];
+	int new_stdout[2];
+	int status;
+	ssize_t ret;
+	pid_t pid;
+
+	ret = pipe(new_stdin);
+	if (ret == -1) {
+		perror("pipe returned error");
+		return -1;
+	}
+
+	ret = pipe(new_stdout);
+	if (ret == -1) {
+		perror("pipe returned error");
+		return -1;
+	}
+
+	pid = fork();
+	// child
+	if (pid == 0) {
+		dup2(new_stdin[0], STDIN_FILENO);
+		if (ret == -1) {
+			perror("dup2 returned error");
+			exit(1);
+		}
+
+		dup2(new_stdout[1], STDOUT_FILENO);
+		if (ret == -1) {
+			perror("dup2 returned error");
+			exit(1);
+		}
+
+		close(new_stdin[0]);
+		close(new_stdin[1]);
+		close(new_stdout[0]);
+		close(new_stdout[1]);
+
+		ret = execl("exec_target", "exec_target", (char *)NULL);
+		if (ret == -1) {
+			perror("exec returned error");
+			exit(1);
+		}
+	}
+
+	close(new_stdin[0]);
+	close(new_stdout[1]);
+
+	ret = write(new_stdin[1], &val, sizeof(size_t));
+	if (ret == -1) {
+		perror("write returned error");
+		return -1;
+	}
+
+	/*
+	 * wait for the worker to finish, so that read() reads all data
+	 * will also context switch with worker so that this function can be used
+	 * for context switch tests
+	 */
+	waitpid(pid, &status, 0);
+	if (WIFEXITED(status) == 0) {
+		fprintf(stderr, "worker exited unexpectedly\n");
+		return -1;
+	}
+	if (WEXITSTATUS(status) != 0) {
+		fprintf(stderr, "worker exited with error\n");
+		return -1;
+	}
+
+	ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+	if (ret == -1) {
+		perror("read returned error");
+		return -1;
+	}
+
+	return 0;
+}
+
 sigjmp_buf jmpbuf;
 void pac_signal_handler(int signum, siginfo_t *si, void *uc)
 {
@@ -92,4 +205,41 @@ TEST(pac_instructions_not_nop_generic)
 	ASSERT_NE(0, keyg)  TH_LOG("keyg instructions did nothing");
 }
 
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+	struct signatures new_keys;
+	struct signatures old_keys;
+	int ret;
+	int same = 10;
+	int nkeys = NKEYS;
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	if (!(hwcaps & HWCAP_PACG)) {
+		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+		nkeys = NKEYS - 1;
+	}
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		ret = exec_sign_all(&new_keys, i);
+		ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+		if (nkeys == NKEYS)
+			sign_all(&old_keys, i);
+		else
+			sign_specific(&old_keys, i);
+
+		ret = n_same(&old_keys, &new_keys, nkeys);
+		if (ret < same)
+			same = ret;
+	}
+
+	ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
 TEST_HARNESS_MAIN

From d21435e9670b11a02bcb1b5683dddd50da61966d Mon Sep 17 00:00:00 2001
From: Boyan Karatotev <boyan.karatotev@arm.com>
Date: Fri, 18 Sep 2020 11:47:15 +0100
Subject: [PATCH 138/262] kselftests/arm64: add PAuth tests for single threaded
 consistency and differently initialized keys

PAuth adds 5 different keys that can be used to sign addresses.

Add a test that verifies that the kernel initializes them to different
values and preserves them across context switches.

Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
Reviewed-by: Vincenzo Frascino <Vincenzo.Frascino@arm.com>
Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20200918104715.182310-5-boian4o1@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/pauth/pac.c | 123 ++++++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index b363ad6a0b50aa..592fe538506e33 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -1,11 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2020 ARM Limited
 
+#define _GNU_SOURCE
+
 #include <sys/auxv.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <signal.h>
 #include <setjmp.h>
+#include <sched.h>
 
 #include "../../kselftest_harness.h"
 #include "helper.h"
@@ -17,6 +20,7 @@
  * The VA space size is 48 bits. Bigger is opt-in.
  */
 #define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
 #define ASSERT_PAUTH_ENABLED() \
 do { \
 	unsigned long hwcaps = getauxval(AT_HWCAP); \
@@ -61,13 +65,37 @@ int n_same(struct signatures *old, struct signatures *new, int nkeys)
 	return res;
 }
 
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+	size_t vals[nkeys];
+	int same = 0;
+
+	vals[0] = sign->keyia & PAC_MASK;
+	vals[1] = sign->keyib & PAC_MASK;
+	vals[2] = sign->keyda & PAC_MASK;
+	vals[3] = sign->keydb & PAC_MASK;
+
+	if (nkeys >= 4)
+		vals[4] = sign->keyg & PAC_MASK;
+
+	for (int i = 0; i < nkeys - 1; i++) {
+		for (int j = i + 1; j < nkeys; j++) {
+			if (vals[i] == vals[j])
+				same += 1;
+		}
+	}
+	return same;
+}
+
 int exec_sign_all(struct signatures *signed_vals, size_t val)
 {
 	int new_stdin[2];
 	int new_stdout[2];
 	int status;
+	int i;
 	ssize_t ret;
 	pid_t pid;
+	cpu_set_t mask;
 
 	ret = pipe(new_stdin);
 	if (ret == -1) {
@@ -81,6 +109,20 @@ int exec_sign_all(struct signatures *signed_vals, size_t val)
 		return -1;
 	}
 
+	/*
+	 * pin this process and all its children to a single CPU, so it can also
+	 * guarantee a context switch with its child
+	 */
+	sched_getaffinity(0, sizeof(mask), &mask);
+
+	for (i = 0; i < sizeof(cpu_set_t); i++)
+		if (CPU_ISSET(i, &mask))
+			break;
+
+	CPU_ZERO(&mask);
+	CPU_SET(i, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+
 	pid = fork();
 	// child
 	if (pid == 0) {
@@ -205,6 +247,44 @@ TEST(pac_instructions_not_nop_generic)
 	ASSERT_NE(0, keyg)  TH_LOG("keyg instructions did nothing");
 }
 
+TEST(single_thread_different_keys)
+{
+	int same = 10;
+	int nkeys = NKEYS;
+	int tmp;
+	struct signatures signed_vals;
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	if (!(hwcaps & HWCAP_PACG)) {
+		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+		nkeys = NKEYS - 1;
+	}
+
+	/*
+	 * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+	 * different, there is about 5% chance for PACs to collide with
+	 * different addresses. This chance rapidly increases with fewer bits
+	 * allocated for the PAC (e.g. wider address). A comparison of the keys
+	 * directly will be more reliable.
+	 * All signed values need to be different at least once out of n
+	 * attempts to be certain that the keys are different
+	 */
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		if (nkeys == NKEYS)
+			sign_all(&signed_vals, i);
+		else
+			sign_specific(&signed_vals, i);
+
+		tmp = n_same_single_set(&signed_vals, nkeys);
+		if (tmp < same)
+			same = tmp;
+	}
+
+	ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
 /*
  * fork() does not change keys. Only exec() does so call a worker program.
  * Its only job is to sign a value and report back the resutls
@@ -242,4 +322,47 @@ TEST(exec_changed_keys)
 	ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
 }
 
+TEST(context_switch_keep_keys)
+{
+	int ret;
+	struct signatures trash;
+	struct signatures before;
+	struct signatures after;
+
+	ASSERT_PAUTH_ENABLED();
+
+	sign_specific(&before, ARBITRARY_VALUE);
+
+	/* will context switch with a process with different keys at least once */
+	ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+	ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+	sign_specific(&after, ARBITRARY_VALUE);
+
+	ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+	ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+	ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+	ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+	int ret;
+	struct signatures trash;
+	size_t before;
+	size_t after;
+
+	ASSERT_GENERIC_PAUTH_ENABLED();
+
+	before = keyg_sign(ARBITRARY_VALUE);
+
+	/* will context switch with a process with different keys at least once */
+	ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+	ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+	after = keyg_sign(ARBITRARY_VALUE);
+
+	ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
 TEST_HARNESS_MAIN

From ca765153eb90577e5fda281485048427b80a9a77 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 19 Aug 2020 12:48:32 +0100
Subject: [PATCH 139/262] selftests: arm64: Test case for enumeration of SVE
 vector lengths

Add a test case that verifies that we can enumerate the SVE vector lengths
on systems where we detect SVE, and that those SVE vector lengths are
valid. This program was written by Dave Martin and adapted to kselftest by
me.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200819114837.51466-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/fp/sve-probe-vls.c        | 58 +++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/sve-probe-vls.c

diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 00000000000000..b29cbc642c5747
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+	unsigned int vq;
+	int vl;
+	static unsigned int vqs[SVE_VQ_MAX];
+	unsigned int nvqs = 0;
+
+	ksft_print_header();
+	ksft_set_plan(2);
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+		ksft_exit_skip("SVE not available");
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+					   strerror(errno), errno);
+
+		vl &= PR_SVE_VL_LEN_MASK;
+
+		if (!sve_vl_valid(vl))
+			ksft_exit_fail_msg("VL %d invalid\n", vl);
+		vq = sve_vq_from_vl(vl);
+
+		if (!(nvqs < SVE_VQ_MAX))
+			ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+					   nvqs);
+		vqs[nvqs++] = vq;
+	}
+	ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+	ksft_test_result_pass("All vector lengths valid\n");
+
+	/* Print out the vector lengths in ascending order: */
+	while (nvqs--)
+		ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+	ksft_exit_pass();
+}

From 0dca276ac4d20d4071b3d3095b1ad33269ea5272 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 19 Aug 2020 12:48:33 +0100
Subject: [PATCH 140/262] selftests: arm64: Add test for the SVE ptrace
 interface

Add a test case that does some basic verification of the SVE ptrace
interface, forking off a child with known values in the registers and
then using ptrace to inspect and manipulate the SVE registers of the
child, including in FPSIMD mode to account for sharing between the SVE
and FPSIMD registers.

This program was written by Dave Martin and modified for kselftest by
me.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200819114837.51466-3-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../selftests/arm64/fp/sve-ptrace-asm.S       |  33 ++
 tools/testing/selftests/arm64/fp/sve-ptrace.c | 336 ++++++++++++++++++
 2 files changed, 369 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
 create mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace.c

diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
new file mode 100644
index 00000000000000..3e81f9fab574b9
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+#include <asm/unistd.h>
+
+.arch_extension sve
+
+.globl sve_store_patterns
+
+sve_store_patterns:
+	mov	x1, x0
+
+	index	z0.b, #0, #1
+	str	q0, [x1]
+
+	mov	w8, #__NR_getpid
+	svc	#0
+	str	q0, [x1, #0x10]
+
+	mov	z1.d, z0.d
+	str	q0, [x1, #0x20]
+
+	mov	w8, #__NR_getpid
+	svc	#0
+	str	q0, [x1, #0x30]
+
+	mov	z1.d, z0.d
+	str	q0, [x1, #0x40]
+
+	ret
+
+.size	sve_store_patterns, . - sve_store_patterns
+.type	sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 00000000000000..b2282be6f9384c
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+/* Number of registers filled in by sve_store_patterns */
+#define NR_VREGS 5
+
+void sve_store_patterns(__uint128_t v[NR_VREGS]);
+
+static void dump(const void *buf, size_t size)
+{
+	size_t i;
+	const unsigned char *p = buf;
+
+	for (i = 0; i < size; ++i)
+		printf(" %.2x", *p++);
+}
+
+static int check_vregs(const __uint128_t vregs[NR_VREGS])
+{
+	int i;
+	int ok = 1;
+
+	for (i = 0; i < NR_VREGS; ++i) {
+		printf("# v[%d]:", i);
+		dump(&vregs[i], sizeof vregs[i]);
+		putchar('\n');
+
+		if (vregs[i] != vregs[0])
+			ok = 0;
+	}
+
+	return ok;
+}
+
+static int do_child(void)
+{
+	if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+		ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+	if (raise(SIGSTOP))
+		ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+	return EXIT_SUCCESS;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+{
+	struct user_sve_header *sve;
+	void *p;
+	size_t sz = sizeof *sve;
+	struct iovec iov;
+
+	while (1) {
+		if (*size < sz) {
+			p = realloc(*buf, sz);
+			if (!p) {
+				errno = ENOMEM;
+				goto error;
+			}
+
+			*buf = p;
+			*size = sz;
+		}
+
+		iov.iov_base = *buf;
+		iov.iov_len = sz;
+		if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+			goto error;
+
+		sve = *buf;
+		if (sve->size <= sz)
+			break;
+
+		sz = sve->size;
+	}
+
+	return sve;
+
+error:
+	return NULL;
+}
+
+static int set_sve(pid_t pid, const struct user_sve_header *sve)
+{
+	struct iovec iov;
+
+	iov.iov_base = (void *)sve;
+	iov.iov_len = sve->size;
+	return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+}
+
+static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
+			  unsigned int vlmax)
+{
+	unsigned int vq;
+	unsigned int i;
+
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+		ksft_exit_fail_msg("Dumping non-SVE register\n");
+
+	if (vlmax > sve->vl)
+		vlmax = sve->vl;
+
+	vq = sve_vq_from_vl(sve->vl);
+	for (i = 0; i < num; ++i) {
+		printf("# z%u:", i);
+		dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+		     vlmax);
+		printf("%s\n", vlmax == sve->vl ? "" : " ...");
+	}
+}
+
+static int do_parent(pid_t child)
+{
+	int ret = EXIT_FAILURE;
+	pid_t pid;
+	int status;
+	siginfo_t si;
+	void *svebuf = NULL, *newsvebuf;
+	size_t svebufsz = 0, newsvebufsz;
+	struct user_sve_header *sve, *new_sve;
+	struct user_fpsimd_state *fpsimd;
+	unsigned int i, j;
+	unsigned char *p;
+	unsigned int vq;
+
+	/* Attach to the child */
+	while (1) {
+		int sig;
+
+		pid = wait(&status);
+		if (pid == -1) {
+			perror("wait");
+			goto error;
+		}
+
+		/*
+		 * This should never happen but it's hard to flag in
+		 * the framework.
+		 */
+		if (pid != child)
+			continue;
+
+		if (WIFEXITED(status) || WIFSIGNALED(status))
+			ksft_exit_fail_msg("Child died unexpectedly\n");
+
+		ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
+				 status);
+		if (!WIFSTOPPED(status))
+			goto error;
+
+		sig = WSTOPSIG(status);
+
+		if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			if (errno == EINVAL) {
+				sig = 0; /* bust group-stop */
+				goto cont;
+			}
+
+			ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+
+		if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+		    si.si_pid == pid)
+			break;
+
+	cont:
+		if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			ksft_test_result_fail("PTRACE_CONT: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+	}
+
+	sve = get_sve(pid, &svebuf, &svebufsz);
+	if (!sve) {
+		int e = errno;
+
+		ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	} else {
+		ksft_test_result_pass("get_sve\n");
+	}
+
+	ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+			 "FPSIMD registers\n");
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+		goto error;
+
+	fpsimd = (struct user_fpsimd_state *)((char *)sve +
+					      SVE_PT_FPSIMD_OFFSET);
+	for (i = 0; i < 32; ++i) {
+		p = (unsigned char *)&fpsimd->vregs[i];
+
+		for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
+			p[j] = j;
+	}
+
+	if (set_sve(pid, sve)) {
+		int e = errno;
+
+		ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+				      strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	vq = sve_vq_from_vl(sve->vl);
+
+	newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	new_sve = newsvebuf = malloc(newsvebufsz);
+	if (!new_sve) {
+		errno = ENOMEM;
+		perror(NULL);
+		goto error;
+	}
+
+	*new_sve = *sve;
+	new_sve->flags &= ~SVE_PT_REGS_MASK;
+	new_sve->flags |= SVE_PT_REGS_SVE;
+	memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+	       0, SVE_PT_SVE_ZREG_SIZE(vq));
+	new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	if (set_sve(pid, new_sve)) {
+		int e = errno;
+
+		ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
+	if (!new_sve) {
+		int e = errno;
+
+		ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
+			 "SVE registers\n");
+	if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+		goto error;
+
+	dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
+
+	p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
+		unsigned char expected = i;
+
+		if (__BYTE_ORDER == __BIG_ENDIAN)
+			expected = sizeof fpsimd->vregs[0] - 1 - expected;
+
+		ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
+		if (p[i] != expected)
+			goto error;
+	}
+
+	ret = EXIT_SUCCESS;
+
+error:
+	kill(child, SIGKILL);
+
+disappeared:
+	return ret;
+}
+
+int main(void)
+{
+	int ret = EXIT_SUCCESS;
+	__uint128_t v[NR_VREGS];
+	pid_t child;
+
+	ksft_print_header();
+	ksft_set_plan(20);
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+		ksft_exit_skip("SVE not available\n");
+
+	sve_store_patterns(v);
+
+	if (!check_vregs(v))
+		ksft_exit_fail_msg("Initial check_vregs() failed\n");
+
+	child = fork();
+	if (!child)
+		return do_child();
+
+	if (do_parent(child))
+		ret = EXIT_FAILURE;
+
+	ksft_print_cnts();
+
+	return 0;
+}

From 5e992c638ea55d928e43d3c1aab1bd8e13835e6e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 19 Aug 2020 12:48:34 +0100
Subject: [PATCH 141/262] selftests: arm64: Add stress tests for FPSMID and SVE
 context switching

Add programs sve-test and fpsimd-test which spin reading and writing to
the SVE and FPSIMD registers, verifying the operations they perform. The
intended use is to leave them running to stress the context switch code's
handling of these registers which isn't compatible with what kselftest
does so they're not integrated into the framework but there's no other
obvious testsuite where they fit so let's store them here.

These tests were written by Dave Martin and lightly adapted by me.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200819114837.51466-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../testing/selftests/arm64/fp/asm-offsets.h  |  11 +
 tools/testing/selftests/arm64/fp/assembler.h  |  57 ++
 .../testing/selftests/arm64/fp/fpsimd-test.S  | 482 +++++++++++++
 tools/testing/selftests/arm64/fp/sve-test.S   | 672 ++++++++++++++++++
 4 files changed, 1222 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/asm-offsets.h
 create mode 100644 tools/testing/selftests/arm64/fp/assembler.h
 create mode 100644 tools/testing/selftests/arm64/fp/fpsimd-test.S
 create mode 100644 tools/testing/selftests/arm64/fp/sve-test.S

diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 00000000000000..a180851496ecf2
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,11 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 00000000000000..8944f2189206fd
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+	.if (\from) == (\to)
+		_for__body %\from
+	.else
+		__for \from, %(\from) + ((\to) - (\from)) / 2
+		__for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+	.endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+	.macro _for__body \var:req
+		.noaltmacro
+		\insn
+		.altmacro
+	.endm
+
+	.altmacro
+	__for \from, \to
+	.noaltmacro
+
+	.purgem _for__body
+.endm
+
+.macro function name
+	.macro endfunction
+		.type \name, @function
+		.purgem endfunction
+	.endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+	.macro \name\()_entry n
+		\insn \n, 1
+		ret
+	.endm
+
+function \name
+	adr	x2, .L__accessor_tbl\@
+	add	x2, x2, x0, lsl #3
+	br	x2
+
+.L__accessor_tbl\@:
+	_for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+	.purgem \name\()_entry
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 00000000000000..1c5556bdd11d16
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR	32
+#define MAXVL_B	(128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+	ld1	{v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+	st1	{v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+	.space	MAXVL_B * NVR
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid	(16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+	orr	w1, w0, w1, lsl #16
+	orr	w2, w1, w2, lsl #28
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w2, [x0], #4
+	add	w2, w2, #(1 << 22)
+	subs	w1, w1, #1
+	bne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+	ldr	\xd, =vref
+	mov	x\nrtmp, #16
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrv	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setv
+
+	ret	x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 1f
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	barf
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+	mov	x3, x30
+
+	_adrv	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getv
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random V-regs
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #7
+	movi	v9.16b, #9
+	movi	v31.8b, #31
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	// Sanity-check and report the vector length
+
+	mov	x19, #128
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"Bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"Vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+
+	mov	x21, #0		// Set up V-regs & shadow with test pattern
+0:	mov	x0, x20
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_vreg
+	add	x21, x21, #1
+	cmp	x21, #NVR
+	b.lo	0b
+
+// Can't do this when SVE state is volatile across SVC:
+	mov	x8, #__NR_sched_yield	// Encourage preemption
+	svc	#0
+
+	mov	x21, #0
+0:	mov	x0, x21
+	bl	check_vreg
+	add	x21, x21, #1
+	cmp	x21, #NVR
+	b.lo	0b
+
+	add	x22, x22, #1
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mistatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", reg="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 00000000000000..f95074c9b48b73
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NZR	32
+#define NPR	16
+#define MAXVL_B	(2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+	ldr	z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+	str	z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+	ldr	p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+	str	p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+	.space	MAXVL_B * NZR
+pref:
+	.space	MAXVL_B / 8 * NPR
+ffrref:
+	.space	MAXVL_B / 8
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid	(16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28	generation number (increments once per test_loop)
+// bits 27:22	32-bit lane index
+// bits 21:16	register number
+// bits 15: 0	pid
+
+function pattern
+	orr	w1, w0, w1, lsl #16
+	orr	w2, w1, w2, lsl #28
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w2, [x0], #4
+	add	w2, w2, #(1 << 22)
+	subs	w1, w1, #1
+	bne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+	ldr	\xd, =zref
+	rdvl	x\nrtmp, #1
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+	ldr	\xd, =pref
+	rdvl	x\nrtmp, #1
+	lsr	x\nrtmp, x\nrtmp, #3
+	sub	\xn, \xn, #NZR
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrz	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setz
+
+	ret	x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrp	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setp
+
+	ret	x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+// Beware: corrupts P0.
+function setup_ffr
+	mov	x4, x30
+
+	bl	pattern
+	ldr	x0, =ffrref
+	ldr	x1, =scratch
+	rdvl	x2, #1
+	lsr	x2, x2, #3
+	bl	memcpy
+
+	mov	x0, #0
+	ldr	x1, =ffrref
+	bl	setp
+
+	wrffr	p0.b
+
+	ret	x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 2f
+
+	stp	x0, x1, [sp, #-0x20]!
+	str	x2, [sp, #0x10]
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	1f
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ldr	x2, [sp, #0x10]
+	ldp	x0, x1, [sp], #0x20
+	b.ne	barf
+
+2:	ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+	mov	x3, x30
+
+	_adrz	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getz
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+	mov	x3, x30
+
+	_adrp	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getp
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+	mov	x3, x30
+
+	ldr	x4, =scratch
+	rdvl	x5, #1
+	lsr	x5, x5, #3
+
+	mov	x0, x4
+	mov	x1, x5
+	bl	memfill_ae
+
+	rdffr	p0.b
+	mov	x0, #0
+	mov	x1, x4
+	bl	getp
+
+	ldr	x0, =ffrref
+	mov	x1, x4
+	mov	x2, x5
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random Z-regs
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #1
+	movi	v9.16b, #2
+	movi	v31.8b, #3
+	// And P0
+	rdffr	p0.b
+	// And FFR
+	wrffr	p15.b
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	// Sanity-check and report the vector length
+
+	rdvl	x19, #8
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"Bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"Vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+	rdvl	x0, #8
+	cmp	x0, x19
+	b.ne	vl_barf
+
+	mov	x21, #0		// Set up Z-regs & shadow with test pattern
+0:	mov	x0, x20
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_zreg
+	add	x21, x21, #1
+	cmp	x21, #NZR
+	b.lo	0b
+
+	mov	x0, x20		// Set up FFR & shadow with test pattern
+	mov	x1, #NZR + NPR
+	and	x2, x22, #0xf
+	bl	setup_ffr
+
+0:	mov	x0, x20		// Set up P-regs & shadow with test pattern
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_preg
+	add	x21, x21, #1
+	cmp	x21, #NZR + NPR
+	b.lo	0b
+
+// Can't do this when SVE state is volatile across SVC:
+//	mov	x8, #__NR_sched_yield	// Encourage preemption
+//	svc	#0
+
+	mov	x21, #0
+0:	mov	x0, x21
+	bl	check_zreg
+	add	x21, x21, #1
+	cmp	x21, #NZR
+	b.lo	0b
+
+0:	mov	x0, x21
+	bl	check_preg
+	add	x21, x21, #1
+	cmp	x21, #NZR + NPR
+	b.lo	0b
+
+	bl	check_ffr
+
+	add	x22, x22, #1
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// end hack
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mistatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", reg="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_getpid
+	svc	#0
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// ^ end of hack
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+//	mov	x8, #__NR_exit
+//	mov	x1, #1
+//	svc	#0
+endfunction
+
+function vl_barf
+	mov	x10, x0
+
+	puts	"Bad active VL: "
+	mov	x0, x10
+	bl	putdecn
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction

From fc7e611f9f38de7166c5f8951df93f7351542448 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 19 Aug 2020 12:48:35 +0100
Subject: [PATCH 142/262] selftests: arm64: Add utility to set SVE vector
 lengths

vlset is a small utility for use in conjunction with tests like the sve-test
stress test which allows another executable to be invoked with a configured
SVE vector length.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200819114837.51466-5-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/fp/vlset.c | 155 +++++++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/fp/vlset.c

diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 00000000000000..308d27a6822698
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+
+static const struct option options[] = {
+	{ "force",	no_argument, NULL, 'f' },
+	{ "inherit",	no_argument, NULL, 'i' },
+	{ "max",	no_argument, NULL, 'M' },
+	{ "no-inherit",	no_argument, &no_inherit, 1 },
+	{ "help",	no_argument, NULL, '?' },
+	{}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+	int c;
+	char *rest;
+
+	program_name = strrchr(argv[0], '/');
+	if (program_name)
+		++program_name;
+	else
+		program_name = argv[0];
+
+	while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+		switch (c) {
+		case 'M':	vl = SVE_VL_MAX; break;
+		case 'f':	force = 1; break;
+		case 'i':	inherit = 1; break;
+		case 0:		break;
+		default:	goto error;
+		}
+
+	if (inherit && no_inherit)
+		goto error;
+
+	if (!vl) {
+		/* vector length */
+		if (optind >= argc)
+			goto error;
+
+		errno = 0;
+		vl = strtoul(argv[optind], &rest, 0);
+		if (*rest) {
+			vl = ULONG_MAX;
+			errno = EINVAL;
+		}
+		if (vl == ULONG_MAX && errno) {
+			fprintf(stderr, "%s: %s: %s\n",
+				program_name, argv[optind], strerror(errno));
+			goto error;
+		}
+
+		++optind;
+	}
+
+	/* command */
+	if (optind >= argc)
+		goto error;
+
+	return 0;
+
+error:
+	fprintf(stderr,
+		"Usage: %s [-f | --force] "
+		"[-i | --inherit | --no-inherit] "
+		"{-M | --max | <vector length>} "
+		"<command> [<arguments> ...]\n",
+		program_name);
+	return -1;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = 126;	/* same as sh(1) command-not-executable error */
+	long flags;
+	char *path;
+	int t, e;
+
+	if (parse_options(argc, argv))
+		return 2;	/* same as sh(1) builtin incorrect-usage */
+
+	if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+		fprintf(stderr, "%s: Invalid vector length %lu\n",
+			program_name, vl);
+		return 2;	/* same as sh(1) builtin incorrect-usage */
+	}
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+		fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+			program_name);
+
+		if (!force)
+			goto error;
+
+		fputs("Going ahead anyway (--force):  "
+		      "This is a debug option.  Don't rely on it.\n",
+		      stderr);
+	}
+
+	flags = PR_SVE_SET_VL_ONEXEC;
+	if (inherit)
+		flags |= PR_SVE_VL_INHERIT;
+
+	t = prctl(PR_SVE_SET_VL, vl | flags);
+	if (t < 0) {
+		fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+			program_name, strerror(errno));
+		goto error;
+	}
+
+	t = prctl(PR_SVE_GET_VL);
+	if (t == -1) {
+		fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+			program_name, strerror(errno));
+		goto error;
+	}
+	flags = PR_SVE_VL_LEN_MASK;
+	flags = t & ~flags;
+
+	assert(optind < argc);
+	path = argv[optind];
+
+	execvp(path, &argv[optind]);
+	e = errno;
+	if (errno == ENOENT)
+		ret = 127;	/* same as sh(1) not-found error */
+	fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+	return ret;		/* same as sh(1) not-executable error */
+}

From 25f47e3eb66e6ee31b3ed3f8c0b7bdce098106fe Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 19 Aug 2020 12:48:36 +0100
Subject: [PATCH 143/262] selftests: arm64: Add wrapper scripts for stress
 tests

Add wrapper scripts which invoke fpsimd-test and sve-test with several
copies per CPU such that the context switch code will be appropriately
exercised.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200819114837.51466-6-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../testing/selftests/arm64/fp/fpsimd-stress  | 60 +++++++++++++++++++
 tools/testing/selftests/arm64/fp/sve-stress   | 59 ++++++++++++++++++
 2 files changed, 119 insertions(+)
 create mode 100755 tools/testing/selftests/arm64/fp/fpsimd-stress
 create mode 100755 tools/testing/selftests/arm64/fp/sve-stress

diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 00000000000000..781b5b022eafa3
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./fpsimd-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 00000000000000..24dd0922cc02b4
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./sve-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1

From e093256d14fbf9e8aaf02ed0ac69087eef6792bd Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 19 Aug 2020 12:48:37 +0100
Subject: [PATCH 144/262] selftests: arm64: Add build and documentation for FP
 tests

Integrate the FP tests with the build system and add some documentation
for the ones run outside the kselftest infrastructure.  The content in
the README was largely written by Dave Martin with edits by me.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200819114837.51466-7-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/Makefile      |   2 +-
 tools/testing/selftests/arm64/fp/.gitignore |   5 +
 tools/testing/selftests/arm64/fp/Makefile   |  17 ++++
 tools/testing/selftests/arm64/fp/README     | 100 ++++++++++++++++++++
 4 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/fp/.gitignore
 create mode 100644 tools/testing/selftests/arm64/fp/Makefile
 create mode 100644 tools/testing/selftests/arm64/fp/README

diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 525506fd97b914..463d56278f8f9e 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal pauth
+ARM64_SUBTARGETS ?= tags signal pauth fp
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 00000000000000..d66f76d2a65025
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,5 @@
+fpsimd-test
+sve-probe-vls
+sve-ptrace
+sve-test
+vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 00000000000000..a57009d3a0dc20
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+
+all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+
+fpsimd-test: fpsimd-test.o
+	$(CC) -nostdlib $^ -o $@
+sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-probe-vls: sve-probe-vls.o
+sve-test: sve-test.o
+	$(CC) -nostdlib $^ -o $@
+vlset: vlset.o
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 00000000000000..03e3dad865d86b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length:        512 bits
+PID:    1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length:  512 bits
+PID:    1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length:  512 bits
+PID:    1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length:  512 bits
+PID:    1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length:  512 bits
+PID:    1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length:  512 bits
+PID:    1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length:  512 bits
+PID:    1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length:  512 bits
+PID:    1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length:  512 bits
+PID:    1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3.  New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that.  Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates.  It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.)  This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.

From e100777016fdf6ec3a9d7c1773b15a2b5eca6c55 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 14 Sep 2020 19:21:28 +0200
Subject: [PATCH 145/262] x86/mce: Annotate mce_rd/wrmsrl() with noinstr

They do get called from the #MC handler which is already marked
"noinstr".

Commit

  e2def7d49d08 ("x86/mce: Make mce_rdmsrl() panic on an inaccessible MSR")

already got rid of the instrumentation in the MSR accessors, fix the
annotation now too, in order to get rid of:

  vmlinux.o: warning: objtool: do_machine_check()+0x4a: call to mce_rdmsrl() leaves .noinstr.text section

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200915194020.28807-1-bp@alien8.de
---
 arch/x86/kernel/cpu/mce/core.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5b1d5f33d60bb7..11b6697be01087 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -392,16 +392,25 @@ __visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
 }
 
 /* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+static noinstr u64 mce_rdmsrl(u32 msr)
 {
 	DECLARE_ARGS(val, low, high);
 
 	if (__this_cpu_read(injectm.finished)) {
-		int offset = msr_to_offset(msr);
+		int offset;
+		u64 ret;
 
+		instrumentation_begin();
+
+		offset = msr_to_offset(msr);
 		if (offset < 0)
-			return 0;
-		return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
+			ret = 0;
+		else
+			ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
+
+		instrumentation_end();
+
+		return ret;
 	}
 
 	/*
@@ -437,15 +446,21 @@ __visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
 	return true;
 }
 
-static void mce_wrmsrl(u32 msr, u64 v)
+static noinstr void mce_wrmsrl(u32 msr, u64 v)
 {
 	u32 low, high;
 
 	if (__this_cpu_read(injectm.finished)) {
-		int offset = msr_to_offset(msr);
+		int offset;
 
+		instrumentation_begin();
+
+		offset = msr_to_offset(msr);
 		if (offset >= 0)
 			*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
+
+		instrumentation_end();
+
 		return;
 	}
 

From 264c03a245de7c5b1cc3836db45de6b991f877ca Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 14 Sep 2020 16:34:07 +0100
Subject: [PATCH 146/262] stacktrace: Remove reliable argument from
 arch_stack_walk() callback

Currently the callback passed to arch_stack_walk() has an argument called
reliable passed to it to indicate if the stack entry is reliable, a comment
says that this is used by some printk() consumers. However in the current
kernel none of the arch_stack_walk() implementations ever set this flag to
true and the only callback implementation we have is in the generic
stacktrace code which ignores the flag. It therefore appears that this
flag is redundant so we can simplify and clarify things by removing it.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Link: https://lore.kernel.org/r/20200914153409.25097-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/s390/kernel/stacktrace.c |  4 ++--
 arch/x86/kernel/stacktrace.c  | 10 +++++-----
 include/linux/stacktrace.h    |  5 +----
 kernel/stacktrace.c           |  8 +++-----
 4 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index fc5419ac64c8a5..7f1266c24f6bc1 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -19,7 +19,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 
 	unwind_for_each_frame(&state, task, regs, 0) {
 		addr = unwind_get_return_address(&state);
-		if (!addr || !consume_entry(cookie, addr, false))
+		if (!addr || !consume_entry(cookie, addr))
 			break;
 	}
 }
@@ -56,7 +56,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 			return -EINVAL;
 #endif
 
-		if (!consume_entry(cookie, addr, false))
+		if (!consume_entry(cookie, addr))
 			return -EINVAL;
 	}
 
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 2fd698e28e4d5d..8627fda8d9930d 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -18,13 +18,13 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 	struct unwind_state state;
 	unsigned long addr;
 
-	if (regs && !consume_entry(cookie, regs->ip, false))
+	if (regs && !consume_entry(cookie, regs->ip))
 		return;
 
 	for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
 	     unwind_next_frame(&state)) {
 		addr = unwind_get_return_address(&state);
-		if (!addr || !consume_entry(cookie, addr, false))
+		if (!addr || !consume_entry(cookie, addr))
 			break;
 	}
 }
@@ -72,7 +72,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 		if (!addr)
 			return -EINVAL;
 
-		if (!consume_entry(cookie, addr, false))
+		if (!consume_entry(cookie, addr))
 			return -EINVAL;
 	}
 
@@ -114,7 +114,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
 {
 	const void __user *fp = (const void __user *)regs->bp;
 
-	if (!consume_entry(cookie, regs->ip, false))
+	if (!consume_entry(cookie, regs->ip))
 		return;
 
 	while (1) {
@@ -128,7 +128,7 @@ void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
 			break;
 		if (!frame.ret_addr)
 			break;
-		if (!consume_entry(cookie, frame.ret_addr, false))
+		if (!consume_entry(cookie, frame.ret_addr))
 			break;
 		fp = frame.next_fp;
 	}
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index b7af8cc13eda4f..50e2df30b0aa31 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -29,14 +29,11 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
  * stack_trace_consume_fn - Callback for arch_stack_walk()
  * @cookie:	Caller supplied pointer handed back by arch_stack_walk()
  * @addr:	The stack entry address to consume
- * @reliable:	True when the stack entry is reliable. Required by
- *		some printk based consumers.
  *
  * Return:	True, if the entry was consumed or skipped
  *		False, if there is no space left to store
  */
-typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr,
-				       bool reliable);
+typedef bool (*stack_trace_consume_fn)(void *cookie, unsigned long addr);
 /**
  * arch_stack_walk - Architecture specific function to walk the stack
  * @consume_entry:	Callback which is invoked by the architecture code for
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 946f44a9e86afb..9f8117c7cfddee 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -78,8 +78,7 @@ struct stacktrace_cookie {
 	unsigned int	len;
 };
 
-static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
-				      bool reliable)
+static bool stack_trace_consume_entry(void *cookie, unsigned long addr)
 {
 	struct stacktrace_cookie *c = cookie;
 
@@ -94,12 +93,11 @@ static bool stack_trace_consume_entry(void *cookie, unsigned long addr,
 	return c->len < c->size;
 }
 
-static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr,
-					      bool reliable)
+static bool stack_trace_consume_entry_nosched(void *cookie, unsigned long addr)
 {
 	if (in_sched_functions(addr))
 		return true;
-	return stack_trace_consume_entry(cookie, addr, reliable);
+	return stack_trace_consume_entry(cookie, addr);
 }
 
 /**

From baa2cd417053cb674deb90ee66b88afea0517335 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 14 Sep 2020 16:34:08 +0100
Subject: [PATCH 147/262] arm64: stacktrace: Make stack walk callback
 consistent with generic code

As with the generic arch_stack_walk() code the arm64 stack walk code takes
a callback that is called per stack frame. Currently the arm64 code always
passes a struct stackframe to the callback and the generic code just passes
the pc, however none of the users ever reference anything in the struct
other than the pc value. The arm64 code also uses a return type of int
while the generic code uses a return type of bool though in both cases the
return value is a boolean value and the sense is inverted between the two.

In order to reduce code duplication when arm64 is converted to use
arch_stack_walk() change the signature and return sense of the arm64
specific callback to match that of the generic code.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Link: https://lore.kernel.org/r/20200914153409.25097-3-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/stacktrace.h |  2 +-
 arch/arm64/kernel/perf_callchain.c  |  6 +++---
 arch/arm64/kernel/return_address.c  |  8 ++++----
 arch/arm64/kernel/stacktrace.c      | 11 +++++------
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index fc7613023c1920..eb29b1fe8255eb 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -63,7 +63,7 @@ struct stackframe {
 
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
 extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
-			    int (*fn)(struct stackframe *, void *), void *data);
+			    bool (*fn)(void *, unsigned long), void *data);
 extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 			   const char *loglvl);
 
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index b0e03e052dd1d5..88ff471b0bce5f 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -137,11 +137,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
  * whist unwinding the stackframe and is like a subroutine return so we use
  * the PC.
  */
-static int callchain_trace(struct stackframe *frame, void *data)
+static bool callchain_trace(void *data, unsigned long pc)
 {
 	struct perf_callchain_entry_ctx *entry = data;
-	perf_callchain_store(entry, frame->pc);
-	return 0;
+	perf_callchain_store(entry, pc);
+	return true;
 }
 
 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index a5e8b3b9d79830..a6d18755652fa1 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -18,16 +18,16 @@ struct return_address_data {
 	void *addr;
 };
 
-static int save_return_addr(struct stackframe *frame, void *d)
+static bool save_return_addr(void *d, unsigned long pc)
 {
 	struct return_address_data *data = d;
 
 	if (!data->level) {
-		data->addr = (void *)frame->pc;
-		return 1;
+		data->addr = (void *)pc;
+		return false;
 	} else {
 		--data->level;
-		return 0;
+		return true;
 	}
 }
 NOKPROBE_SYMBOL(save_return_addr);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 2dd8e3b8b94b76..05eaba21fd4683 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -118,12 +118,12 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 NOKPROBE_SYMBOL(unwind_frame);
 
 void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
-		     int (*fn)(struct stackframe *, void *), void *data)
+			     bool (*fn)(void *, unsigned long), void *data)
 {
 	while (1) {
 		int ret;
 
-		if (fn(frame, data))
+		if (!fn(data, frame->pc))
 			break;
 		ret = unwind_frame(tsk, frame);
 		if (ret < 0)
@@ -139,17 +139,16 @@ struct stack_trace_data {
 	unsigned int skip;
 };
 
-static int save_trace(struct stackframe *frame, void *d)
+static bool save_trace(void *d, unsigned long addr)
 {
 	struct stack_trace_data *data = d;
 	struct stack_trace *trace = data->trace;
-	unsigned long addr = frame->pc;
 
 	if (data->no_sched_functions && in_sched_functions(addr))
-		return 0;
+		return false;
 	if (data->skip) {
 		data->skip--;
-		return 0;
+		return false;
 	}
 
 	trace->entries[trace->nr_entries++] = addr;

From 5fc57df2f6fdc18b9f9273f98318ff7919968c2c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 14 Sep 2020 16:34:09 +0100
Subject: [PATCH 148/262] arm64: stacktrace: Convert to ARCH_STACKWALK

Historically architectures have had duplicated code in their stack trace
implementations for filtering what gets traced. In order to avoid this
duplication some generic code has been provided using a new interface
arch_stack_walk(), enabled by selecting ARCH_STACKWALK in Kconfig, which
factors all this out into the generic stack trace code. Convert arm64
to use this common infrastructure.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Link: https://lore.kernel.org/r/20200914153409.25097-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig             |  1 +
 arch/arm64/kernel/stacktrace.c | 79 +++++-----------------------------
 2 files changed, 11 insertions(+), 69 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d232837cbeee8..d1ba52e4b976c0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -29,6 +29,7 @@ config ARM64
 	select ARCH_HAS_SETUP_DMA_OPS
 	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_SET_MEMORY
+	select ARCH_STACKWALK
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 05eaba21fd4683..804d076b02cb81 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -133,82 +133,23 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 NOKPROBE_SYMBOL(walk_stackframe);
 
 #ifdef CONFIG_STACKTRACE
-struct stack_trace_data {
-	struct stack_trace *trace;
-	unsigned int no_sched_functions;
-	unsigned int skip;
-};
 
-static bool save_trace(void *d, unsigned long addr)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs)
 {
-	struct stack_trace_data *data = d;
-	struct stack_trace *trace = data->trace;
-
-	if (data->no_sched_functions && in_sched_functions(addr))
-		return false;
-	if (data->skip) {
-		data->skip--;
-		return false;
-	}
-
-	trace->entries[trace->nr_entries++] = addr;
-
-	return trace->nr_entries >= trace->max_entries;
-}
-
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
-	struct stack_trace_data data;
-	struct stackframe frame;
-
-	data.trace = trace;
-	data.skip = trace->skip;
-	data.no_sched_functions = 0;
-
-	start_backtrace(&frame, regs->regs[29], regs->pc);
-	walk_stackframe(current, &frame, save_trace, &data);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-
-static noinline void __save_stack_trace(struct task_struct *tsk,
-	struct stack_trace *trace, unsigned int nosched)
-{
-	struct stack_trace_data data;
 	struct stackframe frame;
 
-	if (!try_get_task_stack(tsk))
-		return;
-
-	data.trace = trace;
-	data.skip = trace->skip;
-	data.no_sched_functions = nosched;
-
-	if (tsk != current) {
-		start_backtrace(&frame, thread_saved_fp(tsk),
-				thread_saved_pc(tsk));
-	} else {
-		/* We don't want this function nor the caller */
-		data.skip += 2;
+	if (regs)
+		start_backtrace(&frame, regs->regs[29], regs->pc);
+	else if (task == current)
 		start_backtrace(&frame,
 				(unsigned long)__builtin_frame_address(0),
-				(unsigned long)__save_stack_trace);
-	}
-
-	walk_stackframe(tsk, &frame, save_trace, &data);
+				(unsigned long)arch_stack_walk);
+	else
+		start_backtrace(&frame, thread_saved_fp(task),
+				thread_saved_pc(task));
 
-	put_task_stack(tsk);
-}
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
-	__save_stack_trace(tsk, trace, 1);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-void save_stack_trace(struct stack_trace *trace)
-{
-	__save_stack_trace(current, trace, 0);
+	walk_stackframe(task, &frame, consume_entry, cookie);
 }
 
-EXPORT_SYMBOL_GPL(save_stack_trace);
 #endif

From c6b90d5cf637ac1186ca25ec1f9e410455a0ca7d Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Tue, 15 Sep 2020 16:19:59 +0800
Subject: [PATCH 149/262] arm64/fpsimd: Fix missing-prototypes in fpsimd.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following warnings.
arch/arm64/kernel/fpsimd.c:935:6: warning: no previous prototype for
‘do_sve_acc’ [-Wmissing-prototypes]
arch/arm64/kernel/fpsimd.c:962:6: warning: no previous prototype for
‘do_fpsimd_acc’ [-Wmissing-prototypes]
arch/arm64/kernel/fpsimd.c:971:6: warning: no previous prototype for
‘do_fpsimd_exc’ [-Wmissing-prototypes]
arch/arm64/kernel/fpsimd.c:1266:6: warning: no previous prototype for
‘kernel_neon_begin’ [-Wmissing-prototypes]
arch/arm64/kernel/fpsimd.c:1292:6: warning: no previous prototype for
‘kernel_neon_end’ [-Wmissing-prototypes]

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/1600157999-14802-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/fpsimd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 55c8f3ec6705d0..0a637e373226ad 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -32,9 +32,11 @@
 #include <linux/swab.h>
 
 #include <asm/esr.h>
+#include <asm/exception.h>
 #include <asm/fpsimd.h>
 #include <asm/cpufeature.h>
 #include <asm/cputype.h>
+#include <asm/neon.h>
 #include <asm/processor.h>
 #include <asm/simd.h>
 #include <asm/sigcontext.h>

From 152d75d66428afc636ec34bcef8c90a4aa2a7848 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 16 Sep 2020 10:20:47 +0800
Subject: [PATCH 150/262] arm64: mm: Fix missing-prototypes in pageattr.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following warnings.
‘set_memory_valid’ [-Wmissing-prototypes]
int set_memory_valid(unsigned long addr, int numpages, int enable)
     ^
‘set_direct_map_invalid_noflush’ [-Wmissing-prototypes]
int set_direct_map_invalid_noflush(struct page *page)
     ^
‘set_direct_map_default_noflush’ [-Wmissing-prototypes]
int set_direct_map_default_noflush(struct page *page)
     ^

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/1600222847-56792-1-git-send-email-tiantao6@hisilicon.com
arch/arm64/mm/pageattr.c:138:5: warning: no previous prototype for
arch/arm64/mm/pageattr.c:150:5: warning: no previous prototype for
arch/arm64/mm/pageattr.c:165:5: warning: no previous prototype for
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/pageattr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 23f648c2a19987..1b94f5b8265451 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
 
+#include <asm/cacheflush.h>
 #include <asm/set_memory.h>
 #include <asm/tlbflush.h>
 

From a76b8236edcf5b785d044b930f9e14ad02b4a484 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 15 Sep 2020 16:41:09 -0400
Subject: [PATCH 151/262] drivers/perf: xgene_pmu: Fix uninitialized resource
 struct

This splat was reported on newer Fedora kernels booting on certain
X-gene based machines:

 xgene-pmu APMC0D83:00: X-Gene PMU version 3
 Unable to handle kernel read from unreadable memory at virtual \
 address 0000000000004006
 ...
 Call trace:
  string+0x50/0x100
  vsnprintf+0x160/0x750
  devm_kvasprintf+0x5c/0xb4
  devm_kasprintf+0x54/0x60
  __devm_ioremap_resource+0xdc/0x1a0
  devm_ioremap_resource+0x14/0x20
  acpi_get_pmu_hw_inf.isra.0+0x84/0x15c
  acpi_pmu_dev_add+0xbc/0x21c
  acpi_ns_walk_namespace+0x16c/0x1e4
  acpi_walk_namespace+0xb4/0xfc
  xgene_pmu_probe_pmu_dev+0x7c/0xe0
  xgene_pmu_probe.part.0+0x2c0/0x310
  xgene_pmu_probe+0x54/0x64
  platform_drv_probe+0x60/0xb4
  really_probe+0xe8/0x4a0
  driver_probe_device+0xe4/0x100
  device_driver_attach+0xcc/0xd4
  __driver_attach+0xb0/0x17c
  bus_for_each_dev+0x6c/0xb0
  driver_attach+0x30/0x40
  bus_add_driver+0x154/0x250
  driver_register+0x84/0x140
  __platform_driver_register+0x54/0x60
  xgene_pmu_driver_init+0x28/0x34
  do_one_initcall+0x40/0x204
  do_initcalls+0x104/0x144
  kernel_init_freeable+0x198/0x210
  kernel_init+0x20/0x12c
  ret_from_fork+0x10/0x18
 Code: 91000400 110004e1 eb08009f 540000c0 (38646846)
 ---[ end trace f08c10566496a703 ]---

This is due to use of an uninitialized local resource struct in the xgene
pmu driver. The thunderx2_pmu driver avoids this by using the resource list
constructed by acpi_dev_get_resources() rather than using a callback from
that function. The callback in the xgene driver didn't fully initialize
the resource. So get rid of the callback and search the resource list as
done by thunderx2.

Fixes: 832c927d119b ("perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver")
Signed-off-by: Mark Salter <msalter@redhat.com>
Link: https://lore.kernel.org/r/20200915204110.326138-1-msalter@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/xgene_pmu.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index edac28cd25ddc3..633cf07ba6723e 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1453,17 +1453,6 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
 }
 
 #if defined(CONFIG_ACPI)
-static int acpi_pmu_dev_add_resource(struct acpi_resource *ares, void *data)
-{
-	struct resource *res = data;
-
-	if (ares->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32)
-		acpi_dev_resource_memory(ares, res);
-
-	/* Always tell the ACPI core to skip this resource */
-	return 1;
-}
-
 static struct
 xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 				       struct acpi_device *adev, u32 type)
@@ -1475,6 +1464,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 	struct hw_pmu_info *inf;
 	void __iomem *dev_csr;
 	struct resource res;
+	struct resource_entry *rentry;
 	int enable_bit;
 	int rc;
 
@@ -1483,11 +1473,23 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
 		return NULL;
 
 	INIT_LIST_HEAD(&resource_list);
-	rc = acpi_dev_get_resources(adev, &resource_list,
-				    acpi_pmu_dev_add_resource, &res);
+	rc = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (rc <= 0) {
+		dev_err(dev, "PMU type %d: No resources found\n", type);
+		return NULL;
+	}
+
+	list_for_each_entry(rentry, &resource_list, node) {
+		if (resource_type(rentry->res) == IORESOURCE_MEM) {
+			res = *rentry->res;
+			rentry = NULL;
+			break;
+		}
+	}
 	acpi_dev_free_resource_list(&resource_list);
-	if (rc < 0) {
-		dev_err(dev, "PMU type %d: No resource address found\n", type);
+
+	if (rentry) {
+		dev_err(dev, "PMU type %d: No memory resource found\n", type);
 		return NULL;
 	}
 

From 688494a407d1419a6b158c644b262c61cde39f48 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 15 Sep 2020 16:41:10 -0400
Subject: [PATCH 152/262] drivers/perf: thunderx2_pmu: Fix memory resource
 error handling

In tx2_uncore_pmu_init_dev(), a call to acpi_dev_get_resources() is used
to create a list _CRS resources which is searched for the device base
address. There is an error check following this:

   if (!rentry->res)
           return NULL

In no case, will rentry->res be NULL, so the test is useless. Even
if the test worked, it comes before the resource list memory is
freed. None of this really matters as long as the ACPI table has
the memory resource. Let's clean it up so that it makes sense and
will give a meaningful error should firmware leave out the memory
resource.

Fixes: 69c32972d593 ("drivers/perf: Add Cavium ThunderX2 SoC UNCORE PMU driver")
Signed-off-by: Mark Salter <msalter@redhat.com>
Link: https://lore.kernel.org/r/20200915204110.326138-2-msalter@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/thunderx2_pmu.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index aac9823b0c6bbd..e116815fa8092f 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -805,14 +805,17 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
 	list_for_each_entry(rentry, &list, node) {
 		if (resource_type(rentry->res) == IORESOURCE_MEM) {
 			res = *rentry->res;
+			rentry = NULL;
 			break;
 		}
 	}
+	acpi_dev_free_resource_list(&list);
 
-	if (!rentry->res)
+	if (rentry) {
+		dev_err(dev, "PMU type %d: Fail to find resource\n", type);
 		return NULL;
+	}
 
-	acpi_dev_free_resource_list(&list);
 	base = devm_ioremap_resource(dev, &res);
 	if (IS_ERR(base)) {
 		dev_err(dev, "PMU type %d: Fail to map resource\n", type);

From 0fdb64c2a303e4d2562245501920241c0e507951 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 15:48:09 +0100
Subject: [PATCH 153/262] arm64: Improve diagnostics when trapping BRK with
 FAULT_BRK_IMM

When generating instructions at runtime, for example due to kernel text
patching or the BPF JIT, we can emit a trapping BRK instruction if we
are asked to encode an invalid instruction such as an out-of-range]
branch. This is indicative of a bug in the caller, and will result in a
crash on executing the generated code. Unfortunately, the message from
the crash is really unhelpful, and mumbles something about ptrace:

  | Unexpected kernel BRK exception at EL1
  | Internal error: ptrace BRK handler: f2000100 [#1] SMP

We can do better than this. Install a break handler for FAULT_BRK_IMM,
which is the immediate used to encode the "I've been asked to generate
an invalid instruction" error, and triage the faulting PC to determine
whether or not the failure occurred in the BPF JIT.

Link: https://lore.kernel.org/r/20200915141707.GB26439@willie-the-truck
Reported-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/extable.h   |  9 +++++++++
 arch/arm64/kernel/debug-monitors.c |  2 +-
 arch/arm64/kernel/traps.c          | 17 +++++++++++++++++
 arch/arm64/mm/extable.c            |  4 +---
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 840a35ed92ec8d..b15eb4a3e6b208 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -22,6 +22,15 @@ struct exception_table_entry
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+static inline bool in_bpf_jit(struct pt_regs *regs)
+{
+	if (!IS_ENABLED(CONFIG_BPF_JIT))
+		return false;
+
+	return regs->pc >= BPF_JIT_REGION_START &&
+	       regs->pc < BPF_JIT_REGION_END;
+}
+
 #ifdef CONFIG_BPF_JIT
 int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
 			      struct pt_regs *regs);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 7310a4f7f9931a..fa76151de6ff14 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -384,7 +384,7 @@ void __init debug_traps_init(void)
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
 			      TRAP_TRACE, "single-step handler");
 	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
-			      TRAP_BRKPT, "ptrace BRK handler");
+			      TRAP_BRKPT, "BRK handler");
 }
 
 /* Re-enable single step for syscall restarting. */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 13ebd5ca20706a..00e92c9f338c05 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -34,6 +34,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
+#include <asm/extable.h>
 #include <asm/insn.h>
 #include <asm/kprobes.h>
 #include <asm/traps.h>
@@ -994,6 +995,21 @@ static struct break_hook bug_break_hook = {
 	.imm = BUG_BRK_IMM,
 };
 
+static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
+{
+	pr_err("%s generated an invalid instruction at %pS!\n",
+		in_bpf_jit(regs) ? "BPF JIT" : "Kernel text patching",
+		(void *)instruction_pointer(regs));
+
+	/* We cannot handle this */
+	return DBG_HOOK_ERROR;
+}
+
+static struct break_hook fault_break_hook = {
+	.fn = reserved_fault_handler,
+	.imm = FAULT_BRK_IMM,
+};
+
 #ifdef CONFIG_KASAN_SW_TAGS
 
 #define KASAN_ESR_RECOVER	0x20
@@ -1059,6 +1075,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
 void __init trap_init(void)
 {
 	register_kernel_break_hook(&bug_break_hook);
+	register_kernel_break_hook(&fault_break_hook);
 #ifdef CONFIG_KASAN_SW_TAGS
 	register_kernel_break_hook(&kasan_break_hook);
 #endif
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index eee1732ab6cd38..aa0060178343a8 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -14,9 +14,7 @@ int fixup_exception(struct pt_regs *regs)
 	if (!fixup)
 		return 0;
 
-	if (IS_ENABLED(CONFIG_BPF_JIT) &&
-	    regs->pc >= BPF_JIT_REGION_START &&
-	    regs->pc < BPF_JIT_REGION_END)
+	if (in_bpf_jit(regs))
 		return arm64_bpf_fixup_exception(fixup, regs);
 
 	regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;

From e1ebb2b49048c4767cfa0d8466f9c701e549fa5e Mon Sep 17 00:00:00 2001
From: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Date: Thu, 17 Sep 2020 21:20:38 +0000
Subject: [PATCH 154/262] KVM: SVM: Don't flush cache if hardware enforces
 cache coherency across encryption domains

In some hardware implementations, coherency between the encrypted and
unencrypted mappings of the same physical page in a VM is enforced. In
such a system, it is not required for software to flush the VM's page
from all CPU caches in the system prior to changing the value of the
C-bit for the page.

So check that bit before flushing the cache.

Signed-off-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lkml.kernel.org/r/20200917212038.5090-4-krish.sadhukhan@oracle.com
---
 arch/x86/kvm/svm/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 402dc4234e3978..567792fbbc9fa7 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
 	uint8_t *page_virtual;
 	unsigned long i;
 
-	if (npages == 0 || pages == NULL)
+	if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
+	    pages == NULL)
 		return;
 
 	for (i = 0; i < npages; i++) {

From f186a84d8abecc7dfbd5bbd0a8ad0358078e4767 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Fri, 28 Aug 2020 19:11:48 +0100
Subject: [PATCH 155/262] arm64/fpsimd: Update documentation of do_sve_acc

fpsimd_restore_current_state() enables and disables the SVE access trap
based on TIF_SVE, not task_fpsimd_load(). Update the documentation of
do_sve_acc to reflect this behavior.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200828181155.17745-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/fpsimd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 55c8f3ec6705d0..1c6a82083d5c3a 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -928,7 +928,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * the SVE access trap will be disabled the next time this task
  * reaches ret_to_user.
  *
- * TIF_SVE should be clear on entry: otherwise, task_fpsimd_load()
+ * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state()
  * would have disabled the SVE access trap for userspace during
  * ret_to_user, making an SVE access trap impossible in that case.
  */

From 68a4c52e55e024ad946779f20d50ab31860eb0cc Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Fri, 28 Aug 2020 19:11:49 +0100
Subject: [PATCH 156/262] arm64/signal: Update the comment in
 preserve_sve_context

The SVE state is saved by fpsimd_signal_preserve_current_state() and not
preserve_fpsimd_context(). Update the comment in preserve_sve_context to
reflect the current behavior.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200828181155.17745-3-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/signal.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 3b4f31f35e4585..f3af68dc1cf826 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -244,7 +244,8 @@ static int preserve_sve_context(struct sve_context __user *ctx)
 	if (vq) {
 		/*
 		 * This assumes that the SVE state has already been saved to
-		 * the task struct by calling preserve_fpsimd_context().
+		 * the task struct by calling the function
+		 * fpsimd_signal_preserve_current_state().
 		 */
 		err |= __copy_to_user((char __user *)ctx + SVE_SIG_REGS_OFFSET,
 				      current->thread.sve_state,

From 315cf047d230a363515bedce177cfbf460cbb2d6 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Fri, 28 Aug 2020 19:11:51 +0100
Subject: [PATCH 157/262] arm64/fpsimdmacros: Introduce a macro to update
 ZCR_EL1.LEN

A follow-up patch will need to update ZCR_EL1.LEN.

Add a macro that could be re-used in the current and new places to
avoid code duplication.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200828181155.17745-5-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimdmacros.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 636e9d9c7929c2..60e29a3e41c513 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -183,6 +183,17 @@
 	.purgem _for__body
 .endm
 
+/* Update ZCR_EL1.LEN with the new VQ */
+.macro sve_load_vq xvqminus1, xtmp, xtmp2
+		mrs_s		\xtmp, SYS_ZCR_EL1
+		bic		\xtmp2, \xtmp, ZCR_ELx_LEN_MASK
+		orr		\xtmp2, \xtmp2, \xvqminus1
+		cmp		\xtmp2, \xtmp
+		b.eq		921f
+		msr_s		SYS_ZCR_EL1, \xtmp2	//self-synchronising
+921:
+.endm
+
 .macro sve_save nxbase, xpfpsr, nxtmp
  _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
  _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
@@ -197,13 +208,7 @@
 .endm
 
 .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
-		mrs_s		x\nxtmp, SYS_ZCR_EL1
-		bic		\xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK
-		orr		\xtmp2, \xtmp2, \xvqminus1
-		cmp		\xtmp2, x\nxtmp
-		b.eq		921f
-		msr_s		SYS_ZCR_EL1, \xtmp2	// self-synchronising
-921:
+		sve_load_vq	\xvqminus1, x\nxtmp, \xtmp2
  _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
 		_sve_ldr_p	0, \nxbase
 		_sve_wrffr	0

From 6d40f05fad0babfb3e991edb2d0bd28a30a2f10c Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Fri, 28 Aug 2020 19:11:50 +0100
Subject: [PATCH 158/262] arm64/fpsimdmacros: Allow the macro "for" to be used
 in more cases

The current version of the macro "for" is not able to work when the
counter is used to generate registers using mnemonics. This is because
gas is not able to evaluate the expression generated if used in
register's name (i.e x\n).

Gas offers a way to evaluate macro arguments by using % in front of
them under the alternate macro mode.

The implementation of "for" is updated to use the alternate macro mode
and %, so we can use the macro in more cases. As the alternate macro
mode may have side-effects, this is disabled when expanding the body.

While it is enough to prefix the argument of the macro "__for_body"
with %, the arguments of "__for" are also prefixed to get a more
bearable value in case of compilation error.

Suggested-by: Dave Martin <dave.martin@arm.com>
Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200828181155.17745-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimdmacros.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 60e29a3e41c513..feef5b371fba66 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -166,19 +166,23 @@
 
 .macro __for from:req, to:req
 	.if (\from) == (\to)
-		_for__body \from
+		_for__body %\from
 	.else
-		__for \from, (\from) + ((\to) - (\from)) / 2
-		__for (\from) + ((\to) - (\from)) / 2 + 1, \to
+		__for %\from, %((\from) + ((\to) - (\from)) / 2)
+		__for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
 	.endif
 .endm
 
 .macro _for var:req, from:req, to:req, insn:vararg
 	.macro _for__body \var:req
+		.noaltmacro
 		\insn
+		.altmacro
 	.endm
 
+	.altmacro
 	__for \from, \to
+	.noaltmacro
 
 	.purgem _for__body
 .endm

From 1e530f1352a2709d7bcacdb8b6d42c8900ba2c80 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Fri, 28 Aug 2020 19:11:52 +0100
Subject: [PATCH 159/262] arm64/sve: Implement a helper to flush SVE registers

Introduce a new helper that will zero all SVE registers but the first
128-bits of each vector. This will be used by subsequent patches to
avoid costly store/maipulate/reload sequences in places like do_sve_acc().

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200828181155.17745-6-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimd.h       |  1 +
 arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++
 arch/arm64/kernel/entry-fpsimd.S      |  8 ++++++++
 3 files changed, 28 insertions(+)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 59f10dd13f121e..958f642e930dcb 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -69,6 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
+extern void sve_flush_live(void);
 extern unsigned int sve_get_vl(void);
 
 struct arm64_cpu_capabilities;
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index feef5b371fba66..af43367534c7a5 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -164,6 +164,13 @@
 		| ((\np) << 5)
 .endm
 
+/* PFALSE P\np.B */
+.macro _sve_pfalse np
+	_sve_check_preg \np
+	.inst	0x2518e400			\
+		| (\np)
+.endm
+
 .macro __for from:req, to:req
 	.if (\from) == (\to)
 		_for__body %\from
@@ -198,6 +205,18 @@
 921:
 .endm
 
+/* Preserve the first 128-bits of Znz and zero the rest. */
+.macro _sve_flush_z nz
+	_sve_check_zreg \nz
+	mov	v\nz\().16b, v\nz\().16b
+.endm
+
+.macro sve_flush
+ _for n, 0, 31, _sve_flush_z	\n
+ _for n, 0, 15, _sve_pfalse	\n
+		_sve_wrffr	0
+.endm
+
 .macro sve_save nxbase, xpfpsr, nxtmp
  _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
  _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index f880dd63ddc380..fdf7a5a35c6325 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -32,6 +32,7 @@ SYM_FUNC_START(fpsimd_load_state)
 SYM_FUNC_END(fpsimd_load_state)
 
 #ifdef CONFIG_ARM64_SVE
+
 SYM_FUNC_START(sve_save_state)
 	sve_save 0, x1, 2
 	ret
@@ -46,4 +47,11 @@ SYM_FUNC_START(sve_get_vl)
 	_sve_rdvl	0, 1
 	ret
 SYM_FUNC_END(sve_get_vl)
+
+/* Zero all SVE registers but the first 128-bits of each vector */
+SYM_FUNC_START(sve_flush_live)
+	sve_flush
+	ret
+SYM_FUNC_END(sve_flush_live)
+
 #endif /* CONFIG_ARM64_SVE */

From 9c4b4c701e53183df94f6b9802b63762f0c0e1e3 Mon Sep 17 00:00:00 2001
From: Julien Grall <julien.grall@arm.com>
Date: Fri, 28 Aug 2020 19:11:53 +0100
Subject: [PATCH 160/262] arm64/sve: Implement a helper to load SVE registers
 from FPSIMD state

In a follow-up patch, we may save the FPSIMD rather than the full SVE
state when the state has to be zeroed on return to userspace (e.g
during a syscall).

Introduce an helper to load SVE vectors from FPSIMD state and zero the rest
of SVE registers.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Link: https://lore.kernel.org/r/20200828181155.17745-7-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimd.h  |  2 ++
 arch/arm64/kernel/entry-fpsimd.S | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 958f642e930dcb..bec5f14b622ae2 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -70,6 +70,8 @@ extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
 extern void sve_flush_live(void);
+extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
+				       unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
 
 struct arm64_cpu_capabilities;
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index fdf7a5a35c6325..2ca395c25448f4 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -48,6 +48,23 @@ SYM_FUNC_START(sve_get_vl)
 	ret
 SYM_FUNC_END(sve_get_vl)
 
+/*
+ * Load SVE state from FPSIMD state.
+ *
+ * x0 = pointer to struct fpsimd_state
+ * x1 = VQ - 1
+ *
+ * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD
+ * and the rest zeroed. All the other SVE registers will be zeroed.
+ */
+SYM_FUNC_START(sve_load_from_fpsimd_state)
+		sve_load_vq	x1, x2, x3
+		fpsimd_restore	x0, 8
+ _for n, 0, 15, _sve_pfalse	\n
+		_sve_wrffr	0
+		ret
+SYM_FUNC_END(sve_load_from_fpsimd_state)
+
 /* Zero all SVE registers but the first 128-bits of each vector */
 SYM_FUNC_START(sve_flush_live)
 	sve_flush

From b11483ef5a502663732c6ca1b58d14ff9eedd6f7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 16 Jul 2020 17:11:08 +0100
Subject: [PATCH 161/262] arm64: Make use of ARCH_WORKAROUND_1 even when KVM is
 not enabled

We seem to be pretending that we don't have any firmware mitigation
when KVM is not compiled in, which is not quite expected.

Bring back the mitigation in this case.

Fixes: 4db61fef16a1 ("arm64: kvm: Modernize __smccc_workaround_1_smc_start annotations")
Cc: <stable@vger.kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu_errata.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index c332d49780dc96..88966496806ae3 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -234,14 +234,17 @@ static int detect_harden_bp_fw(void)
 		smccc_end = NULL;
 		break;
 
-#if IS_ENABLED(CONFIG_KVM)
 	case SMCCC_CONDUIT_SMC:
 		cb = call_smc_arch_workaround_1;
+#if IS_ENABLED(CONFIG_KVM)
 		smccc_start = __smccc_workaround_1_smc;
 		smccc_end = __smccc_workaround_1_smc +
 			__SMCCC_WORKAROUND_1_SMC_SZ;
-		break;
+#else
+		smccc_start = NULL;
+		smccc_end = NULL;
 #endif
+		break;
 
 	default:
 		return -1;

From 18fce56134c987e5b4eceddafdbe4b00c07e2ae1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 16 Jul 2020 17:11:09 +0100
Subject: [PATCH 162/262] arm64: Run ARCH_WORKAROUND_1 enabling code on all
 CPUs

Commit 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack
thereof") changed the way we deal with ARCH_WORKAROUND_1, by moving most
of the enabling code to the .matches() callback.

This has the unfortunate effect that the workaround gets only enabled on
the first affected CPU, and no other.

In order to address this, forcefully call the .matches() callback from a
.cpu_enable() callback, which brings us back to the original behaviour.

Fixes: 73f381660959 ("arm64: Advertise mitigation of Spectre-v2, or lack thereof")
Cc: <stable@vger.kernel.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu_errata.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 88966496806ae3..3fe64bf5a58d40 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -599,6 +599,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 	return (need_wa > 0);
 }
 
+static void
+cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
+{
+	cap->matches(cap, SCOPE_LOCAL_CPU);
+}
+
 static const __maybe_unused struct midr_range tx2_family_cpus[] = {
 	MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
 	MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
@@ -890,9 +896,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	},
 #endif
 	{
+		.desc = "Branch predictor hardening",
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = check_branch_predictor,
+		.cpu_enable = cpu_enable_branch_predictor_hardening,
 	},
 #ifdef CONFIG_RANDOMIZE_BASE
 	{

From 9e0f085c2b33ebe13bcec53cbacce505fe78fde7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Mon, 21 Sep 2020 13:23:41 +0100
Subject: [PATCH 163/262] arm64: Move console stack display code to
 stacktrace.c

Currently the code for displaying a stack trace on the console is located
in traps.c rather than stacktrace.c, using the unwinding code that is in
stacktrace.c. This can be confusing and make the code hard to find since
such output is often referred to as a stack trace which might mislead the
unwary. Due to this and since traps.c doesn't interact with this code
except for via the public interfaces move the code to stacktrace.c to
make it easier to find.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20200921122341.11280-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/stacktrace.c | 65 ++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/traps.c      | 65 ----------------------------------
 2 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 804d076b02cb81..fa56af1a59c39f 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -132,6 +132,71 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 }
 NOKPROBE_SYMBOL(walk_stackframe);
 
+static void dump_backtrace_entry(unsigned long where, const char *loglvl)
+{
+	printk("%s %pS\n", loglvl, (void *)where);
+}
+
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
+		    const char *loglvl)
+{
+	struct stackframe frame;
+	int skip = 0;
+
+	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+	if (regs) {
+		if (user_mode(regs))
+			return;
+		skip = 1;
+	}
+
+	if (!tsk)
+		tsk = current;
+
+	if (!try_get_task_stack(tsk))
+		return;
+
+	if (tsk == current) {
+		start_backtrace(&frame,
+				(unsigned long)__builtin_frame_address(0),
+				(unsigned long)dump_backtrace);
+	} else {
+		/*
+		 * task blocked in __switch_to
+		 */
+		start_backtrace(&frame,
+				thread_saved_fp(tsk),
+				thread_saved_pc(tsk));
+	}
+
+	printk("%sCall trace:\n", loglvl);
+	do {
+		/* skip until specified stack frame */
+		if (!skip) {
+			dump_backtrace_entry(frame.pc, loglvl);
+		} else if (frame.fp == regs->regs[29]) {
+			skip = 0;
+			/*
+			 * Mostly, this is the case where this function is
+			 * called in panic/abort. As exception handler's
+			 * stack frame does not contain the corresponding pc
+			 * at which an exception has taken place, use regs->pc
+			 * instead.
+			 */
+			dump_backtrace_entry(regs->pc, loglvl);
+		}
+	} while (!unwind_frame(tsk, &frame));
+
+	put_task_stack(tsk);
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
+{
+	dump_backtrace(NULL, tsk, loglvl);
+	barrier();
+}
+
 #ifdef CONFIG_STACKTRACE
 
 void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 13ebd5ca20706a..77dc8e7701ed4c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -53,11 +53,6 @@ static const char *handler[]= {
 
 int show_unhandled_signals = 0;
 
-static void dump_backtrace_entry(unsigned long where, const char *loglvl)
-{
-	printk("%s %pS\n", loglvl, (void *)where);
-}
-
 static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
 {
 	unsigned long addr = instruction_pointer(regs);
@@ -83,66 +78,6 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
 	printk("%sCode: %s\n", lvl, str);
 }
 
-void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
-		    const char *loglvl)
-{
-	struct stackframe frame;
-	int skip = 0;
-
-	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
-
-	if (regs) {
-		if (user_mode(regs))
-			return;
-		skip = 1;
-	}
-
-	if (!tsk)
-		tsk = current;
-
-	if (!try_get_task_stack(tsk))
-		return;
-
-	if (tsk == current) {
-		start_backtrace(&frame,
-				(unsigned long)__builtin_frame_address(0),
-				(unsigned long)dump_backtrace);
-	} else {
-		/*
-		 * task blocked in __switch_to
-		 */
-		start_backtrace(&frame,
-				thread_saved_fp(tsk),
-				thread_saved_pc(tsk));
-	}
-
-	printk("%sCall trace:\n", loglvl);
-	do {
-		/* skip until specified stack frame */
-		if (!skip) {
-			dump_backtrace_entry(frame.pc, loglvl);
-		} else if (frame.fp == regs->regs[29]) {
-			skip = 0;
-			/*
-			 * Mostly, this is the case where this function is
-			 * called in panic/abort. As exception handler's
-			 * stack frame does not contain the corresponding pc
-			 * at which an exception has taken place, use regs->pc
-			 * instead.
-			 */
-			dump_backtrace_entry(regs->pc, loglvl);
-		}
-	} while (!unwind_frame(tsk, &frame));
-
-	put_task_stack(tsk);
-}
-
-void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
-{
-	dump_backtrace(NULL, tsk, loglvl);
-	barrier();
-}
-
 #ifdef CONFIG_PREEMPT
 #define S_PREEMPT " PREEMPT"
 #elif defined(CONFIG_PREEMPT_RT)

From a194c5f2d2b3a05428805146afcabe5140b5d378 Mon Sep 17 00:00:00 2001
From: Zhengyuan Liu <liuzhengyuan@tj.kylinos.cn>
Date: Mon, 21 Sep 2020 10:39:36 +0800
Subject: [PATCH 164/262] arm64/mm: return cpu_all_mask when node is
 NUMA_NO_NODE

The @node passed to cpumask_of_node() can be NUMA_NO_NODE, in that
case it will trigger the following WARN_ON(node >= nr_node_ids) due to
mismatched data types of @node and @nr_node_ids. Actually we should
return cpu_all_mask just like most other architectures do if passed
NUMA_NO_NODE.

Also add a similar check to the inline cpumask_of_node() in numa.h.

Signed-off-by: Zhengyuan Liu <liuzhengyuan@tj.kylinos.cn>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/20200921023936.21846-1-liuzhengyuan@tj.kylinos.cn
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/numa.h | 3 +++
 arch/arm64/mm/numa.c          | 6 +++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index 626ad01e83bf01..dd870390d639f2 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -25,6 +25,9 @@ const struct cpumask *cpumask_of_node(int node);
 /* Returns a pointer to the cpumask of CPUs on Node 'node'. */
 static inline const struct cpumask *cpumask_of_node(int node)
 {
+	if (node == NUMA_NO_NODE)
+		return cpu_all_mask;
+
 	return node_to_cpumask_map[node];
 }
 #endif
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 73f8b49d485c2c..88e51aade0da07 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -46,7 +46,11 @@ EXPORT_SYMBOL(node_to_cpumask_map);
  */
 const struct cpumask *cpumask_of_node(int node)
 {
-	if (WARN_ON(node >= nr_node_ids))
+
+	if (node == NUMA_NO_NODE)
+		return cpu_all_mask;
+
+	if (WARN_ON(node < 0 || node >= nr_node_ids))
 		return cpu_none_mask;
 
 	if (WARN_ON(node_to_cpumask_map[node] == NULL))

From 1ef5423a55c2ac6f1361811efe75b6e46d1023ed Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Tue, 22 Sep 2020 06:56:38 +0900
Subject: [PATCH 165/262] x86/fpu: Handle FPU-related and clearcpuid command
 line arguments earlier

FPU initialization handles them currently. However, in the case
of clearcpuid=, some other early initialization code may check for
features before the FPU initialization code is called. Handling the
argument earlier allows the command line to influence those early
initializations.

Signed-off-by: Mike Hommey <mh@glandium.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200921215638.37980-1-mh@glandium.org
---
 arch/x86/kernel/cpu/common.c | 55 ++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/fpu/init.c   | 55 ------------------------------------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c5d6f17d9b9d38..3c7519398ad5ca 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -23,6 +23,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/pgtable.h>
 
+#include <asm/cmdline.h>
 #include <asm/stackprotector.h>
 #include <asm/perf_event.h>
 #include <asm/mmu_context.h>
@@ -1220,6 +1221,59 @@ static void detect_nopl(void)
 #endif
 }
 
+/*
+ * We parse cpu parameters early because fpu__init_system() is executed
+ * before parse_early_param().
+ */
+static void __init cpu_parse_early_param(void)
+{
+	char arg[128];
+	char *argptr = arg;
+	int arglen, res, bit;
+
+#ifdef CONFIG_X86_32
+	if (cmdline_find_option_bool(boot_command_line, "no387"))
+#ifdef CONFIG_MATH_EMULATION
+		setup_clear_cpu_cap(X86_FEATURE_FPU);
+#else
+		pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
+#endif
+
+	if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
+		setup_clear_cpu_cap(X86_FEATURE_FXSR);
+#endif
+
+	if (cmdline_find_option_bool(boot_command_line, "noxsave"))
+		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+
+	if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
+		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+
+	if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
+		setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+	arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
+	if (arglen <= 0)
+		return;
+
+	pr_info("Clearing CPUID bits:");
+	do {
+		res = get_option(&argptr, &bit);
+		if (res == 0 || res == 3)
+			break;
+
+		/* If the argument was too long, the last bit may be cut off */
+		if (res == 1 && arglen >= sizeof(arg))
+			break;
+
+		if (bit >= 0 && bit < NCAPINTS * 32) {
+			pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
+			setup_clear_cpu_cap(bit);
+		}
+	} while (res == 2);
+	pr_cont("\n");
+}
+
 /*
  * Do minimum CPU detection early.
  * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -1255,6 +1309,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		get_cpu_cap(c);
 		get_cpu_address_sizes(c);
 		setup_force_cpu_cap(X86_FEATURE_CPUID);
+		cpu_parse_early_param();
 
 		if (this_cpu->c_early_init)
 			this_cpu->c_early_init(c);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index f8ff895aaf7e17..701f196d7c6866 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -5,7 +5,6 @@
 #include <asm/fpu/internal.h>
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
-#include <asm/cmdline.h>
 
 #include <linux/sched.h>
 #include <linux/sched/task.h>
@@ -237,66 +236,12 @@ static void __init fpu__init_system_ctx_switch(void)
 	on_boot_cpu = 0;
 }
 
-/*
- * We parse fpu parameters early because fpu__init_system() is executed
- * before parse_early_param().
- */
-static void __init fpu__init_parse_early_param(void)
-{
-	char arg[128];
-	char *argptr = arg;
-	int arglen, res, bit;
-
-#ifdef CONFIG_X86_32
-	if (cmdline_find_option_bool(boot_command_line, "no387"))
-#ifdef CONFIG_MATH_EMULATION
-		setup_clear_cpu_cap(X86_FEATURE_FPU);
-#else
-		pr_err("Option 'no387' required CONFIG_MATH_EMULATION enabled.\n");
-#endif
-
-	if (cmdline_find_option_bool(boot_command_line, "nofxsr"))
-		setup_clear_cpu_cap(X86_FEATURE_FXSR);
-#endif
-
-	if (cmdline_find_option_bool(boot_command_line, "noxsave"))
-		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-
-	if (cmdline_find_option_bool(boot_command_line, "noxsaveopt"))
-		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-
-	if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
-		setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-
-	arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
-	if (arglen <= 0)
-		return;
-
-	pr_info("Clearing CPUID bits:");
-	do {
-		res = get_option(&argptr, &bit);
-		if (res == 0 || res == 3)
-			break;
-
-		/* If the argument was too long, the last bit may be cut off */
-		if (res == 1 && arglen >= sizeof(arg))
-			break;
-
-		if (bit >= 0 && bit < NCAPINTS * 32) {
-			pr_cont(" " X86_CAP_FMT, x86_cap_flag(bit));
-			setup_clear_cpu_cap(bit);
-		}
-	} while (res == 2);
-	pr_cont("\n");
-}
-
 /*
  * Called on the boot CPU once per system bootup, to set up the initial
  * FPU state that is later cloned into all processes:
  */
 void __init fpu__init_system(struct cpuinfo_x86 *c)
 {
-	fpu__init_parse_early_param();
 	fpu__init_system_early_generic(c);
 
 	/*

From 900ffe39fec908e0aa26a30612e43ebc7140db79 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 19 Sep 2020 01:09:36 -0700
Subject: [PATCH 166/262] x86/entry: Fix typo in comments for
 syscall_enter_from_user_mode()

Just to help myself and others with finding the correct function names,
fix a typo for "usermode" vs "user_mode".

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200919080936.259819-1-keescook@chromium.org
---
 include/linux/entry-common.h | 2 +-
 kernel/entry/common.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index efebbffcd5cc97..3b6754d5a6043a 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -38,7 +38,7 @@
 #endif
 
 /*
- * TIF flags handled in syscall_enter_from_usermode()
+ * TIF flags handled in syscall_enter_from_user_mode()
  */
 #ifndef ARCH_SYSCALL_ENTER_WORK
 # define ARCH_SYSCALL_ENTER_WORK	(0)
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index fcae019158cad4..7c7b9d00338aef 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -183,7 +183,7 @@ static inline bool report_single_step(unsigned long ti_work)
 /*
  * If TIF_SYSCALL_EMU is set, then the only reason to report is when
  * TIF_SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP).  This syscall
- * instruction has been already reported in syscall_enter_from_usermode().
+ * instruction has been already reported in syscall_enter_from_user_mode().
  */
 #define SYSEMU_STEP	(_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)
 

From 4bd442e9a8388e8ec4ba7cf23a4774989d93b78e Mon Sep 17 00:00:00 2001
From: Qinglang Miao <miaoqinglang@huawei.com>
Date: Sat, 19 Sep 2020 09:22:52 +0800
Subject: [PATCH 167/262] RAS/CEC: Convert to DEFINE_SHOW_ATTRIBUTE()

Use the DEFINE_SHOW_ATTRIBUTE() macro and simplify the code.

Signed-off-by: Qinglang Miao <miaoqinglang@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200919012252.171437-1-miaoqinglang@huawei.com
---
 drivers/ras/cec.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c
index 6939aa5b3dc7fa..ddecf25b5dd407 100644
--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -435,7 +435,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(action_threshold_ops, u64_get, action_threshold_set, "%
 
 static const char * const bins[] = { "00", "01", "10", "11" };
 
-static int array_dump(struct seq_file *m, void *v)
+static int array_show(struct seq_file *m, void *v)
 {
 	struct ce_array *ca = &ce_arr;
 	int i;
@@ -467,18 +467,7 @@ static int array_dump(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int array_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, array_dump, NULL);
-}
-
-static const struct file_operations array_ops = {
-	.owner	 = THIS_MODULE,
-	.open	 = array_open,
-	.read	 = seq_read,
-	.llseek	 = seq_lseek,
-	.release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(array);
 
 static int __init create_debugfs_nodes(void)
 {
@@ -513,7 +502,7 @@ static int __init create_debugfs_nodes(void)
 		goto err;
 	}
 
-	array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_ops);
+	array = debugfs_create_file("array", S_IRUSR, d, NULL, &array_fops);
 	if (!array) {
 		pr_warn("Error creating array debugfs node!\n");
 		goto err;

From 50c5feeea0af99a4401fd54fd72bec1333a496ca Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Thu, 24 Sep 2020 08:48:04 +1000
Subject: [PATCH 168/262] ide/macide: Convert Mac IDE driver to platform driver

Add platform devices for the Mac IDE controller variants. Convert the
macide module into a platform driver to support two of those variants.
For the third, use a generic "pata_platform" driver instead.
This enables automatic loading of the appropriate module and begins
the process of replacing the driver with libata alternatives.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Tested-by: Stan Johnson <userm57@yahoo.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Joshua Thompson <funaho@jurai.org>
References: commit 5ed0794cde593 ("m68k/atari: Convert Falcon IDE drivers to platform drivers")
References: commit 7ad19a99ad431 ("ide: officially deprecated the legacy IDE driver")
Link: https://lore.kernel.org/r/edd106dad1bbea32500601c6071f37a9f02a8004.1600901284.git.fthain@telegraphics.com.au
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/configs/mac_defconfig   |  1 +
 arch/m68k/configs/multi_defconfig |  1 +
 arch/m68k/mac/config.c            | 41 +++++++++++++++++++
 drivers/ide/Kconfig               |  7 ++--
 drivers/ide/macide.c              | 66 ++++++++++++++++++++-----------
 5 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 522dcf624aa509..3cd76bfaee0386 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -317,6 +317,7 @@ CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_PLATFORM=y
 CONFIG_BLK_DEV_MAC_IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 2433409f4369d0..c3d6faa7894f12 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -346,6 +346,7 @@ CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_PLATFORM=y
 CONFIG_BLK_DEV_GAYLE=y
 CONFIG_BLK_DEV_BUDDHA=y
 CONFIG_BLK_DEV_FALCON_IDE=y
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 5c9f3a2d653882..43fc29180cb588 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/vt_kern.h>
 #include <linux/platform_device.h>
+#include <linux/ata_platform.h>
 #include <linux/adb.h>
 #include <linux/cuda.h>
 #include <linux/pmu.h>
@@ -940,6 +941,26 @@ static const struct resource mac_scsi_ccl_rsrc[] __initconst = {
 	},
 };
 
+static const struct resource mac_ide_quadra_rsrc[] __initconst = {
+	DEFINE_RES_MEM(0x50F1A000, 0x104),
+	DEFINE_RES_IRQ(IRQ_NUBUS_F),
+};
+
+static const struct resource mac_ide_pb_rsrc[] __initconst = {
+	DEFINE_RES_MEM(0x50F1A000, 0x104),
+	DEFINE_RES_IRQ(IRQ_NUBUS_C),
+};
+
+static const struct resource mac_pata_baboon_rsrc[] __initconst = {
+	DEFINE_RES_MEM(0x50F1A000, 0x38),
+	DEFINE_RES_MEM(0x50F1A038, 0x04),
+	DEFINE_RES_IRQ(IRQ_BABOON_1),
+};
+
+static const struct pata_platform_info mac_pata_baboon_data __initconst = {
+	.ioport_shift = 2,
+};
+
 int __init mac_platform_init(void)
 {
 	phys_addr_t swim_base = 0;
@@ -1048,6 +1069,26 @@ int __init mac_platform_init(void)
 		break;
 	}
 
+	/*
+	 * IDE device
+	 */
+
+	switch (macintosh_config->ide_type) {
+	case MAC_IDE_QUADRA:
+		platform_device_register_simple("mac_ide", -1,
+			mac_ide_quadra_rsrc, ARRAY_SIZE(mac_ide_quadra_rsrc));
+		break;
+	case MAC_IDE_PB:
+		platform_device_register_simple("mac_ide", -1,
+			mac_ide_pb_rsrc, ARRAY_SIZE(mac_ide_pb_rsrc));
+		break;
+	case MAC_IDE_BABOON:
+		platform_device_register_resndata(NULL, "pata_platform", -1,
+			mac_pata_baboon_rsrc, ARRAY_SIZE(mac_pata_baboon_rsrc),
+			&mac_pata_baboon_data, sizeof(mac_pata_baboon_data));
+		break;
+	}
+
 	/*
 	 * Ethernet device
 	 */
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 973ed4b684cec2..19abf11c84c8a3 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -744,9 +744,10 @@ config BLK_DEV_MAC_IDE
 	depends on MAC
 	help
 	  This is the IDE driver for the on-board IDE interface on some m68k
-	  Macintosh models. It supports both the `Quadra style' (used in
-	  Quadra/ Centris 630 and Performa 588 models) and `Powerbook style'
-	  (used in the Powerbook 150 and 190 models) IDE interface.
+	  Macintosh models, namely Quadra/Centris 630, Performa 588 and
+	  Powerbook 150. The IDE interface on the Powerbook 190 is not
+	  supported by this driver and requires BLK_DEV_PLATFORM or
+	  PATA_PLATFORM.
 
 	  Say Y if you have such an Macintosh model and want to use IDE
 	  devices (hard disks, CD-ROM drives, etc.) that are connected to the
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
index adc5fe9daafc21..8d2bf73bc548d3 100644
--- a/drivers/ide/macide.c
+++ b/drivers/ide/macide.c
@@ -18,10 +18,11 @@
 #include <linux/delay.h>
 #include <linux/ide.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
 
 #include <asm/macintosh.h>
-#include <asm/macints.h>
-#include <asm/mac_baboon.h>
+
+#define DRV_NAME "mac_ide"
 
 #define IDE_BASE 0x50F1A000	/* Base address of IDE controller */
 
@@ -100,42 +101,61 @@ static const char *mac_ide_name[] =
  * Probe for a Macintosh IDE interface
  */
 
-static int __init macide_init(void)
+static int mac_ide_probe(struct platform_device *pdev)
 {
-	unsigned long base;
-	int irq;
+	struct resource *mem, *irq;
 	struct ide_hw hw, *hws[] = { &hw };
 	struct ide_port_info d = macide_port_info;
+	struct ide_host *host;
+	int rc;
 
 	if (!MACH_IS_MAC)
 		return -ENODEV;
 
-	switch (macintosh_config->ide_type) {
-	case MAC_IDE_QUADRA:
-		base = IDE_BASE;
-		irq = IRQ_NUBUS_F;
-		break;
-	case MAC_IDE_PB:
-		base = IDE_BASE;
-		irq = IRQ_NUBUS_C;
-		break;
-	case MAC_IDE_BABOON:
-		base = BABOON_BASE;
-		d.port_ops = NULL;
-		irq = IRQ_BABOON_1;
-		break;
-	default:
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem)
+		return -ENODEV;
+
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq)
 		return -ENODEV;
+
+	if (!devm_request_mem_region(&pdev->dev, mem->start,
+				     resource_size(mem), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
+		return -EBUSY;
 	}
 
 	printk(KERN_INFO "ide: Macintosh %s IDE controller\n",
 			 mac_ide_name[macintosh_config->ide_type - 1]);
 
-	macide_setup_ports(&hw, base, irq);
+	macide_setup_ports(&hw, mem->start, irq->start);
 
-	return ide_host_add(&d, hws, 1, NULL);
+	rc = ide_host_add(&d, hws, 1, &host);
+	if (rc)
+		return rc;
+
+	platform_set_drvdata(pdev, host);
+	return 0;
 }
 
-module_init(macide_init);
+static int mac_ide_remove(struct platform_device *pdev)
+{
+	struct ide_host *host = platform_get_drvdata(pdev);
+
+	ide_host_remove(host);
+	return 0;
+}
+
+static struct platform_driver mac_ide_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe  = mac_ide_probe,
+	.remove = mac_ide_remove,
+};
+
+module_platform_driver(mac_ide_driver);
 
+MODULE_ALIAS("platform:" DRV_NAME);
 MODULE_LICENSE("GPL");

From f5be3a61fdb5dd11ef60173e2783ccf62685f892 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Tue, 22 Sep 2020 13:53:45 +0800
Subject: [PATCH 169/262] arm64: perf: Add support caps under sysfs

ARMv8.4-PMU introduces the PMMIR_EL1 registers and some new PMU events,
like STALL_SLOT etc, are related to it. Let's add a caps directory to
/sys/bus/event_source/devices/armv8_pmuv3_0/ and support slots from
PMMIR_EL1 registers in this entry. The user programs can get the slots
from sysfs directly.

/sys/bus/event_source/devices/armv8_pmuv3_0/caps/slots is exposed
under sysfs. Both ARMv8.4-PMU and STALL_SLOT event are implemented,
it returns the slots from PMMIR_EL1, otherwise it will return 0.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/1600754025-53535-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/perf_event.h |   3 +
 arch/arm64/include/asm/sysreg.h     |   2 +
 arch/arm64/kernel/perf_event.c      | 103 +++++++++++++++++++---------
 include/linux/perf/arm_pmu.h        |   3 +
 4 files changed, 78 insertions(+), 33 deletions(-)

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 2c2d7dbe8a0298..60731f602d3ef7 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -236,6 +236,9 @@
 #define ARMV8_PMU_USERENR_CR	(1 << 2) /* Cycle counter can be read at EL0 */
 #define ARMV8_PMU_USERENR_ER	(1 << 3) /* Event counter can be read at EL0 */
 
+/* PMMIR_EL1.SLOTS mask */
+#define ARMV8_PMU_SLOTS_MASK	0xff
+
 #ifdef CONFIG_PERF_EVENTS
 struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 554a7e8ecb0746..921773adff5eda 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -321,6 +321,8 @@
 #define SYS_PMINTENSET_EL1		sys_reg(3, 0, 9, 14, 1)
 #define SYS_PMINTENCLR_EL1		sys_reg(3, 0, 9, 14, 2)
 
+#define SYS_PMMIR_EL1			sys_reg(3, 0, 9, 14, 6)
+
 #define SYS_MAIR_EL1			sys_reg(3, 0, 10, 2, 0)
 #define SYS_AMAIR_EL1			sys_reg(3, 0, 10, 3, 0)
 
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 04d0ceb72a67f3..34a7cd3af6997c 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -305,6 +305,28 @@ static struct attribute_group armv8_pmuv3_format_attr_group = {
 	.attrs = armv8_pmuv3_format_attrs,
 };
 
+static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
+			  char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
+
+	return snprintf(page, PAGE_SIZE, "0x%08x\n", slots);
+}
+
+static DEVICE_ATTR_RO(slots);
+
+static struct attribute *armv8_pmuv3_caps_attrs[] = {
+	&dev_attr_slots.attr,
+	NULL,
+};
+
+static struct attribute_group armv8_pmuv3_caps_attr_group = {
+	.name = "caps",
+	.attrs = armv8_pmuv3_caps_attrs,
+};
+
 /*
  * Perf Events' indices
  */
@@ -984,6 +1006,12 @@ static void __armv8pmu_probe_pmu(void *info)
 
 	bitmap_from_arr32(cpu_pmu->pmceid_ext_bitmap,
 			     pmceid, ARMV8_PMUV3_MAX_COMMON_EVENTS);
+
+	/* store PMMIR_EL1 register for sysfs */
+	if (pmuver >= ID_AA64DFR0_PMUVER_8_4 && (pmceid_raw[1] & BIT(31)))
+		cpu_pmu->reg_pmmir = read_cpuid(PMMIR_EL1);
+	else
+		cpu_pmu->reg_pmmir = 0;
 }
 
 static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
@@ -1006,7 +1034,8 @@ static int armv8pmu_probe_pmu(struct arm_pmu *cpu_pmu)
 static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
 			  int (*map_event)(struct perf_event *event),
 			  const struct attribute_group *events,
-			  const struct attribute_group *format)
+			  const struct attribute_group *format,
+			  const struct attribute_group *caps)
 {
 	int ret = armv8pmu_probe_pmu(cpu_pmu);
 	if (ret)
@@ -1031,104 +1060,112 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
 			events : &armv8_pmuv3_events_attr_group;
 	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = format ?
 			format : &armv8_pmuv3_format_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_CAPS] = caps ?
+			caps : &armv8_pmuv3_caps_attr_group;
 
 	return 0;
 }
 
+static int armv8_pmu_init_nogroups(struct arm_pmu *cpu_pmu, char *name,
+				   int (*map_event)(struct perf_event *event))
+{
+	return armv8_pmu_init(cpu_pmu, name, map_event, NULL, NULL, NULL);
+}
+
 static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_pmuv3",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_pmuv3",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_a34_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a34",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a34",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_a35_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a35",
-			      armv8_a53_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a35",
+				       armv8_a53_map_event);
 }
 
 static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a53",
-			      armv8_a53_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a53",
+				       armv8_a53_map_event);
 }
 
 static int armv8_a55_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a55",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a55",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a57",
-			      armv8_a57_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a57",
+				       armv8_a57_map_event);
 }
 
 static int armv8_a65_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a65",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a65",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_a72_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a72",
-			      armv8_a57_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a72",
+				       armv8_a57_map_event);
 }
 
 static int armv8_a73_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a73",
-			      armv8_a73_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a73",
+				       armv8_a73_map_event);
 }
 
 static int armv8_a75_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a75",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a75",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_a76_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a76",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a76",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cortex_a77",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a77",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_neoverse_e1",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_n1_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_neoverse_n1",
-			      armv8_pmuv3_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_n1",
+				       armv8_pmuv3_map_event);
 }
 
 static int armv8_thunder_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_cavium_thunder",
-			      armv8_thunder_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cavium_thunder",
+				       armv8_thunder_map_event);
 }
 
 static int armv8_vulcan_pmu_init(struct arm_pmu *cpu_pmu)
 {
-	return armv8_pmu_init(cpu_pmu, "armv8_brcm_vulcan",
-			      armv8_vulcan_map_event, NULL, NULL);
+	return armv8_pmu_init_nogroups(cpu_pmu, "armv8_brcm_vulcan",
+				       armv8_vulcan_map_event);
 }
 
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 5b616dde9a4c2c..505480217cf1a9 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -73,6 +73,7 @@ enum armpmu_attr_groups {
 	ARMPMU_ATTR_GROUP_COMMON,
 	ARMPMU_ATTR_GROUP_EVENTS,
 	ARMPMU_ATTR_GROUP_FORMATS,
+	ARMPMU_ATTR_GROUP_CAPS,
 	ARMPMU_NR_ATTR_GROUPS
 };
 
@@ -109,6 +110,8 @@ struct arm_pmu {
 	struct notifier_block	cpu_pm_nb;
 	/* the attr_groups array must be NULL-terminated */
 	const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
+	/* store the PMMIR_EL1 to expose slots */
+	u64		reg_pmmir;
 
 	/* Only to be used by ACPI probing code */
 	unsigned long acpi_cpuid;

From c8fdbbfa981a7bc64acec234620788c97d1f6a88 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 18 Sep 2020 19:24:09 +0100
Subject: [PATCH 170/262] perf: Add Arm CMN-600 DT binding

Document the requirements for the CMN-600 DT binding. The internal
topology is almost entirely discoverable by walking a tree of ID
registers, but sadly both the starting point for that walk and the
exact format of those registers are configuration-dependent and not
discoverable from some sane fixed location. Oh well.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../devicetree/bindings/perf/arm,cmn.yaml     | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/perf/arm,cmn.yaml

diff --git a/Documentation/devicetree/bindings/perf/arm,cmn.yaml b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
new file mode 100644
index 00000000000000..e4fcc0de25e2a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/perf/arm,cmn.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2020 Arm Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/perf/arm,cmn.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Arm CMN (Coherent Mesh Network) Performance Monitors
+
+maintainers:
+  - Robin Murphy <robin.murphy@arm.com>
+
+properties:
+  compatible:
+    const: arm,cmn-600
+
+  reg:
+    items:
+      - description: Physical address of the base (PERIPHBASE) and
+          size (up to 64MB) of the configuration address space.
+
+  interrupts:
+    minItems: 1
+    maxItems: 4
+    items:
+      - description: Overflow interrupt for DTC0
+      - description: Overflow interrupt for DTC1
+      - description: Overflow interrupt for DTC2
+      - description: Overflow interrupt for DTC3
+    description: One interrupt for each DTC domain implemented must
+      be specified, in order. DTC0 is always present.
+
+  arm,root-node:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Offset from PERIPHBASE of the configuration
+      discovery node (see TRM definition of ROOTNODEBASE).
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - arm,root-node
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    pmu@50000000 {
+        compatible = "arm,cmn-600";
+        reg = <0x50000000 0x4000000>;
+        /* 4x2 mesh with one DTC, and CFG node at 0,1,1,0 */
+        interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+        arm,root-node = <0x104000>;
+    };
+...

From 0ba64770a2f2e5a104bf835e133d78d3f82287ad Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 18 Sep 2020 14:28:38 +0100
Subject: [PATCH 171/262] perf: Add Arm CMN-600 PMU driver

Initial driver for PMU event counting on the Arm CMN-600 interconnect.
CMN sports an obnoxiously complex distributed PMU system as part of
its debug and trace features, which can do all manner of things like
sampling, cross-triggering and generating CoreSight trace. This driver
covers the PMU functionality, plus the relevant aspects of watchpoints
for simply counting matching flits.

Tested-by: Tsahi Zidenberg <tsahee@amazon.com>
Tested-by: Tuan Phan <tuanphan@os.amperecomputing.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/admin-guide/perf/arm-cmn.rst |   65 +
 Documentation/admin-guide/perf/index.rst   |    1 +
 drivers/perf/Kconfig                       |    7 +
 drivers/perf/Makefile                      |    1 +
 drivers/perf/arm-cmn.c                     | 1641 ++++++++++++++++++++
 5 files changed, 1715 insertions(+)
 create mode 100644 Documentation/admin-guide/perf/arm-cmn.rst
 create mode 100644 drivers/perf/arm-cmn.c

diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst
new file mode 100644
index 00000000000000..0e48093460140b
--- /dev/null
+++ b/Documentation/admin-guide/perf/arm-cmn.rst
@@ -0,0 +1,65 @@
+=============================
+Arm Coherent Mesh Network PMU
+=============================
+
+CMN-600 is a configurable mesh interconnect consisting of a rectangular
+grid of crosspoints (XPs), with each crosspoint supporting up to two
+device ports to which various AMBA CHI agents are attached.
+
+CMN implements a distributed PMU design as part of its debug and trace
+functionality. This consists of a local monitor (DTM) at every XP, which
+counts up to 4 event signals from the connected device nodes and/or the
+XP itself. Overflow from these local counters is accumulated in up to 8
+global counters implemented by the main controller (DTC), which provides
+overall PMU control and interrupts for global counter overflow.
+
+PMU events
+----------
+
+The PMU driver registers a single PMU device for the whole interconnect,
+see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link
+more than one CMN together via external CCIX links - in this situation,
+each mesh counts its own events entirely independently, and additional
+PMU devices will be named arm_cmn_{1..n}.
+
+Most events are specified in a format based directly on the TRM
+definitions - "type" selects the respective node type, and "eventid" the
+event number. Some events require an additional occupancy ID, which is
+specified by "occupid".
+
+* Since RN-D nodes do not have any distinct events from RN-I nodes, they
+  are treated as the same type (0xa), and the common event templates are
+  named "rnid_*".
+
+* The cycle counter is treated as a synthetic event belonging to the DTC
+  node ("type" == 0x3, "eventid" is ignored).
+
+* XP events also encode the port and channel in the "eventid" field, to
+  match the underlying pmu_event0_id encoding for the pmu_event_sel
+  register. The event templates are named with prefixes to cover all
+  permutations.
+
+By default each event provides an aggregate count over all nodes of the
+given type. To target a specific node, "bynodeid" must be set to 1 and
+"nodeid" to the appropriate value derived from the CMN configuration
+(as defined in the "Node ID Mapping" section of the TRM).
+
+Watchpoints
+-----------
+
+The PMU can also count watchpoint events to monitor specific flit
+traffic. Watchpoints are treated as a synthetic event type, and like PMU
+events can be global or targeted with a particular XP's "nodeid" value.
+Since the watchpoint direction is otherwise implicit in the underlying
+register selection, separate events are provided for flit uploads and
+downloads.
+
+The flit match value and mask are passed in config1 and config2 ("val"
+and "mask" respectively). "wp_dev_sel", "wp_chn_sel", "wp_grp" and
+"wp_exclusive" are specified per the TRM definitions for dtm_wp_config0.
+Where a watchpoint needs to match fields from both match groups on the
+REQ or SNP channel, it can be specified as two events - one for each
+group - with the same nonzero "combine" value. The count for such a
+pair of combined events will be attributed to the primary match.
+Watchpoint events with a "combine" value of 0 are considered independent
+and will count individually.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 47c99f40cc160b..5a8f2529a0331d 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -12,6 +12,7 @@ Performance monitor support
    qcom_l2_pmu
    qcom_l3_pmu
    arm-ccn
+   arm-cmn
    xgene-pmu
    arm_dsu_pmu
    thunderx2-pmu
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 7305d57d189047..130327ff0b0ec8 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -41,6 +41,13 @@ config ARM_CCN
 	  PMU (perf) driver supporting the ARM CCN (Cache Coherent Network)
 	  interconnect.
 
+config ARM_CMN
+	tristate "Arm CMN-600 PMU support"
+	depends on ARM64 || (COMPILE_TEST && 64BIT)
+	help
+	  Support for PMU events monitoring on the Arm CMN-600 Coherent Mesh
+	  Network interconnect.
+
 config ARM_PMU
 	depends on ARM || ARM64
 	bool "ARM PMU framework"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 2ebb4de1781517..5365fd56f88f35 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_ARM_CCI_PMU) += arm-cci.o
 obj-$(CONFIG_ARM_CCN) += arm-ccn.o
+obj-$(CONFIG_ARM_CMN) += arm-cmn.o
 obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
 obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
new file mode 100644
index 00000000000000..e824b5b83ea25f
--- /dev/null
+++ b/drivers/perf/arm-cmn.c
@@ -0,0 +1,1641 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2016-2020 Arm Limited
+// CMN-600 Coherent Mesh Network PMU driver
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+
+/* Common register stuff */
+#define CMN_NODE_INFO			0x0000
+#define CMN_NI_NODE_TYPE		GENMASK_ULL(15, 0)
+#define CMN_NI_NODE_ID			GENMASK_ULL(31, 16)
+#define CMN_NI_LOGICAL_ID		GENMASK_ULL(47, 32)
+
+#define CMN_NODEID_DEVID(reg)		((reg) & 3)
+#define CMN_NODEID_PID(reg)		(((reg) >> 2) & 1)
+#define CMN_NODEID_X(reg, bits)		((reg) >> (3 + (bits)))
+#define CMN_NODEID_Y(reg, bits)		(((reg) >> 3) & ((1U << (bits)) - 1))
+
+#define CMN_CHILD_INFO			0x0080
+#define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
+#define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
+
+#define CMN_CHILD_NODE_ADDR		GENMASK(27,0)
+#define CMN_CHILD_NODE_EXTERNAL		BIT(31)
+
+#define CMN_ADDR_NODE_PTR		GENMASK(27, 14)
+
+#define CMN_NODE_PTR_DEVID(ptr)		(((ptr) >> 2) & 3)
+#define CMN_NODE_PTR_PID(ptr)		((ptr) & 1)
+#define CMN_NODE_PTR_X(ptr, bits)	((ptr) >> (6 + (bits)))
+#define CMN_NODE_PTR_Y(ptr, bits)	(((ptr) >> 6) & ((1U << (bits)) - 1))
+
+#define CMN_MAX_XPS			(8 * 8)
+
+/* The CFG node has one other useful purpose */
+#define CMN_CFGM_PERIPH_ID_2		0x0010
+#define CMN_CFGM_PID2_REVISION		GENMASK(7, 4)
+
+/* PMU registers occupy the 3rd 4KB page of each node's 16KB space */
+#define CMN_PMU_OFFSET			0x2000
+
+/* For most nodes, this is all there is */
+#define CMN_PMU_EVENT_SEL		0x000
+#define CMN_PMU_EVENTn_ID_SHIFT(n)	((n) * 8)
+
+/* DTMs live in the PMU space of XP registers */
+#define CMN_DTM_WPn(n)			(0x1A0 + (n) * 0x18)
+#define CMN_DTM_WPn_CONFIG(n)		(CMN_DTM_WPn(n) + 0x00)
+#define CMN_DTM_WPn_CONFIG_WP_COMBINE	BIT(6)
+#define CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE	BIT(5)
+#define CMN_DTM_WPn_CONFIG_WP_GRP	BIT(4)
+#define CMN_DTM_WPn_CONFIG_WP_CHN_SEL	GENMASK_ULL(3, 1)
+#define CMN_DTM_WPn_CONFIG_WP_DEV_SEL	BIT(0)
+#define CMN_DTM_WPn_VAL(n)		(CMN_DTM_WPn(n) + 0x08)
+#define CMN_DTM_WPn_MASK(n)		(CMN_DTM_WPn(n) + 0x10)
+
+#define CMN_DTM_PMU_CONFIG		0x210
+#define CMN__PMEVCNT0_INPUT_SEL		GENMASK_ULL(37, 32)
+#define CMN__PMEVCNT0_INPUT_SEL_WP	0x00
+#define CMN__PMEVCNT0_INPUT_SEL_XP	0x04
+#define CMN__PMEVCNT0_INPUT_SEL_DEV	0x10
+#define CMN__PMEVCNT0_GLOBAL_NUM	GENMASK_ULL(18, 16)
+#define CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(n)	((n) * 4)
+#define CMN__PMEVCNT_PAIRED(n)		BIT(4 + (n))
+#define CMN__PMEVCNT23_COMBINED		BIT(2)
+#define CMN__PMEVCNT01_COMBINED		BIT(1)
+#define CMN_DTM_PMU_CONFIG_PMU_EN	BIT(0)
+
+#define CMN_DTM_PMEVCNT			0x220
+
+#define CMN_DTM_PMEVCNTSR		0x240
+
+#define CMN_DTM_NUM_COUNTERS		4
+
+/* The DTC node is where the magic happens */
+#define CMN_DT_DTC_CTL			0x0a00
+#define CMN_DT_DTC_CTL_DT_EN		BIT(0)
+
+/* DTC counters are paired in 64-bit registers on a 16-byte stride. Yuck */
+#define _CMN_DT_CNT_REG(n)		((((n) / 2) * 4 + (n) % 2) * 4)
+#define CMN_DT_PMEVCNT(n)		(CMN_PMU_OFFSET + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTR			(CMN_PMU_OFFSET + 0x40)
+
+#define CMN_DT_PMEVCNTSR(n)		(CMN_PMU_OFFSET + 0x50 + _CMN_DT_CNT_REG(n))
+#define CMN_DT_PMCCNTRSR		(CMN_PMU_OFFSET + 0x90)
+
+#define CMN_DT_PMCR			(CMN_PMU_OFFSET + 0x100)
+#define CMN_DT_PMCR_PMU_EN		BIT(0)
+#define CMN_DT_PMCR_CNTR_RST		BIT(5)
+#define CMN_DT_PMCR_OVFL_INTR_EN	BIT(6)
+
+#define CMN_DT_PMOVSR			(CMN_PMU_OFFSET + 0x118)
+#define CMN_DT_PMOVSR_CLR		(CMN_PMU_OFFSET + 0x120)
+
+#define CMN_DT_PMSSR			(CMN_PMU_OFFSET + 0x128)
+#define CMN_DT_PMSSR_SS_STATUS(n)	BIT(n)
+
+#define CMN_DT_PMSRR			(CMN_PMU_OFFSET + 0x130)
+#define CMN_DT_PMSRR_SS_REQ		BIT(0)
+
+#define CMN_DT_NUM_COUNTERS		8
+#define CMN_MAX_DTCS			4
+
+/*
+ * Even in the worst case a DTC counter can't wrap in fewer than 2^42 cycles,
+ * so throwing away one bit to make overflow handling easy is no big deal.
+ */
+#define CMN_COUNTER_INIT		0x80000000
+/* Similarly for the 40-bit cycle counter */
+#define CMN_CC_INIT			0x8000000000ULL
+
+
+/* Event attributes */
+#define CMN_CONFIG_TYPE			GENMASK(15, 0)
+#define CMN_CONFIG_EVENTID		GENMASK(23, 16)
+#define CMN_CONFIG_OCCUPID		GENMASK(27, 24)
+#define CMN_CONFIG_BYNODEID		BIT(31)
+#define CMN_CONFIG_NODEID		GENMASK(47, 32)
+
+#define CMN_EVENT_TYPE(event)		FIELD_GET(CMN_CONFIG_TYPE, (event)->attr.config)
+#define CMN_EVENT_EVENTID(event)	FIELD_GET(CMN_CONFIG_EVENTID, (event)->attr.config)
+#define CMN_EVENT_OCCUPID(event)	FIELD_GET(CMN_CONFIG_OCCUPID, (event)->attr.config)
+#define CMN_EVENT_BYNODEID(event)	FIELD_GET(CMN_CONFIG_BYNODEID, (event)->attr.config)
+#define CMN_EVENT_NODEID(event)		FIELD_GET(CMN_CONFIG_NODEID, (event)->attr.config)
+
+#define CMN_CONFIG_WP_COMBINE		GENMASK(27, 24)
+#define CMN_CONFIG_WP_DEV_SEL		BIT(48)
+#define CMN_CONFIG_WP_CHN_SEL		GENMASK(50, 49)
+#define CMN_CONFIG_WP_GRP		BIT(52)
+#define CMN_CONFIG_WP_EXCLUSIVE		BIT(53)
+#define CMN_CONFIG1_WP_VAL		GENMASK(63, 0)
+#define CMN_CONFIG2_WP_MASK		GENMASK(63, 0)
+
+#define CMN_EVENT_WP_COMBINE(event)	FIELD_GET(CMN_CONFIG_WP_COMBINE, (event)->attr.config)
+#define CMN_EVENT_WP_DEV_SEL(event)	FIELD_GET(CMN_CONFIG_WP_DEV_SEL, (event)->attr.config)
+#define CMN_EVENT_WP_CHN_SEL(event)	FIELD_GET(CMN_CONFIG_WP_CHN_SEL, (event)->attr.config)
+#define CMN_EVENT_WP_GRP(event)		FIELD_GET(CMN_CONFIG_WP_GRP, (event)->attr.config)
+#define CMN_EVENT_WP_EXCLUSIVE(event)	FIELD_GET(CMN_CONFIG_WP_EXCLUSIVE, (event)->attr.config)
+#define CMN_EVENT_WP_VAL(event)		FIELD_GET(CMN_CONFIG1_WP_VAL, (event)->attr.config1)
+#define CMN_EVENT_WP_MASK(event)	FIELD_GET(CMN_CONFIG2_WP_MASK, (event)->attr.config2)
+
+/* Made-up event IDs for watchpoint direction */
+#define CMN_WP_UP			0
+#define CMN_WP_DOWN			2
+
+
+/* r0px probably don't exist in silicon, thankfully */
+enum cmn_revision {
+	CMN600_R1P0,
+	CMN600_R1P1,
+	CMN600_R1P2,
+	CMN600_R1P3,
+	CMN600_R2P0,
+	CMN600_R3P0,
+};
+
+enum cmn_node_type {
+	CMN_TYPE_INVALID,
+	CMN_TYPE_DVM,
+	CMN_TYPE_CFG,
+	CMN_TYPE_DTC,
+	CMN_TYPE_HNI,
+	CMN_TYPE_HNF,
+	CMN_TYPE_XP,
+	CMN_TYPE_SBSX,
+	CMN_TYPE_RNI = 0xa,
+	CMN_TYPE_RND = 0xd,
+	CMN_TYPE_RNSAM = 0xf,
+	CMN_TYPE_CXRA = 0x100,
+	CMN_TYPE_CXHA = 0x101,
+	CMN_TYPE_CXLA = 0x102,
+	/* Not a real node type */
+	CMN_TYPE_WP = 0x7770
+};
+
+struct arm_cmn_node {
+	void __iomem *pmu_base;
+	u16 id, logid;
+	enum cmn_node_type type;
+
+	union {
+		/* Device node */
+		struct {
+			int to_xp;
+			/* DN/HN-F/CXHA */
+			unsigned int occupid_val;
+			unsigned int occupid_count;
+		};
+		/* XP */
+		struct {
+			int dtc;
+			u32 pmu_config_low;
+			union {
+				u8 input_sel[4];
+				__le32 pmu_config_high;
+			};
+			s8 wp_event[4];
+		};
+	};
+
+	union {
+		u8 event[4];
+		__le32 event_sel;
+	};
+};
+
+struct arm_cmn_dtc {
+	void __iomem *base;
+	unsigned int irq;
+	int irq_friend;
+	bool cc_active;
+
+	struct perf_event *counters[CMN_DT_NUM_COUNTERS];
+	struct perf_event *cycles;
+};
+
+#define CMN_STATE_DISABLED	BIT(0)
+#define CMN_STATE_TXN		BIT(1)
+
+struct arm_cmn {
+	struct device *dev;
+	void __iomem *base;
+
+	enum cmn_revision rev;
+	u8 mesh_x;
+	u8 mesh_y;
+	u16 num_xps;
+	u16 num_dns;
+	struct arm_cmn_node *xps;
+	struct arm_cmn_node *dns;
+
+	struct arm_cmn_dtc *dtc;
+	unsigned int num_dtcs;
+
+	int cpu;
+	struct hlist_node cpuhp_node;
+
+	unsigned int state;
+	struct pmu pmu;
+};
+
+#define to_cmn(p)	container_of(p, struct arm_cmn, pmu)
+
+static int arm_cmn_hp_state;
+
+struct arm_cmn_hw_event {
+	struct arm_cmn_node *dn;
+	u64 dtm_idx[2];
+	unsigned int dtc_idx;
+	u8 dtcs_used;
+	u8 num_dns;
+};
+
+#define for_each_hw_dn(hw, dn, i) \
+	for (i = 0, dn = hw->dn; i < hw->num_dns; i++, dn++)
+
+static struct arm_cmn_hw_event *to_cmn_hw(struct perf_event *event)
+{
+	BUILD_BUG_ON(sizeof(struct arm_cmn_hw_event) > offsetof(struct hw_perf_event, target));
+	return (struct arm_cmn_hw_event *)&event->hw;
+}
+
+static void arm_cmn_set_index(u64 x[], unsigned int pos, unsigned int val)
+{
+	x[pos / 32] |= (u64)val << ((pos % 32) * 2);
+}
+
+static unsigned int arm_cmn_get_index(u64 x[], unsigned int pos)
+{
+	return (x[pos / 32] >> ((pos % 32) * 2)) & 3;
+}
+
+struct arm_cmn_event_attr {
+	struct device_attribute attr;
+	enum cmn_node_type type;
+	u8 eventid;
+	u8 occupid;
+};
+
+struct arm_cmn_format_attr {
+	struct device_attribute attr;
+	u64 field;
+	int config;
+};
+
+static int arm_cmn_xyidbits(const struct arm_cmn *cmn)
+{
+	return cmn->mesh_x > 4 || cmn->mesh_y > 4 ? 3 : 2;
+}
+
+static void arm_cmn_init_node_to_xp(const struct arm_cmn *cmn,
+				    struct arm_cmn_node *dn)
+{
+	int bits = arm_cmn_xyidbits(cmn);
+	int x = CMN_NODEID_X(dn->id, bits);
+	int y = CMN_NODEID_Y(dn->id, bits);
+	int xp_idx = cmn->mesh_x * y + x;
+
+	dn->to_xp = (cmn->xps + xp_idx) - dn;
+}
+
+static struct arm_cmn_node *arm_cmn_node_to_xp(struct arm_cmn_node *dn)
+{
+	return dn->type == CMN_TYPE_XP ? dn : dn + dn->to_xp;
+}
+
+static struct arm_cmn_node *arm_cmn_node(const struct arm_cmn *cmn,
+					 enum cmn_node_type type)
+{
+	int i;
+
+	for (i = 0; i < cmn->num_dns; i++)
+		if (cmn->dns[i].type == type)
+			return &cmn->dns[i];
+	return NULL;
+}
+
+#define CMN_EVENT_ATTR(_name, _type, _eventid, _occupid)		\
+	(&((struct arm_cmn_event_attr[]) {{				\
+		.attr = __ATTR(_name, 0444, arm_cmn_event_show, NULL),	\
+		.type = _type,						\
+		.eventid = _eventid,					\
+		.occupid = _occupid,					\
+	}})[0].attr.attr)
+
+static bool arm_cmn_is_occup_event(enum cmn_node_type type, unsigned int id)
+{
+	return (type == CMN_TYPE_DVM && id == 0x05) ||
+	       (type == CMN_TYPE_HNF && id == 0x0f);
+}
+
+static ssize_t arm_cmn_event_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn_event_attr *eattr;
+
+	eattr = container_of(attr, typeof(*eattr), attr);
+
+	if (eattr->type == CMN_TYPE_DTC)
+		return snprintf(buf, PAGE_SIZE, "type=0x%x\n", eattr->type);
+
+	if (eattr->type == CMN_TYPE_WP)
+		return snprintf(buf, PAGE_SIZE,
+				"type=0x%x,eventid=0x%x,wp_dev_sel=?,wp_chn_sel=?,wp_grp=?,wp_val=?,wp_mask=?\n",
+				eattr->type, eattr->eventid);
+
+	if (arm_cmn_is_occup_event(eattr->type, eattr->eventid))
+		return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x,occupid=0x%x\n",
+				eattr->type, eattr->eventid, eattr->occupid);
+
+	return snprintf(buf, PAGE_SIZE, "type=0x%x,eventid=0x%x\n",
+			eattr->type, eattr->eventid);
+}
+
+static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
+					     struct attribute *attr,
+					     int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+	struct arm_cmn_event_attr *eattr;
+	enum cmn_node_type type;
+
+	eattr = container_of(attr, typeof(*eattr), attr.attr);
+	type = eattr->type;
+
+	/* Watchpoints aren't nodes */
+	if (type == CMN_TYPE_WP)
+		type = CMN_TYPE_XP;
+
+	/* Revision-specific differences */
+	if (cmn->rev < CMN600_R1P2) {
+		if (type == CMN_TYPE_HNF && eattr->eventid == 0x1b)
+			return 0;
+	}
+
+	if (!arm_cmn_node(cmn, type))
+		return 0;
+
+	return attr->mode;
+}
+
+#define _CMN_EVENT_DVM(_name, _event, _occup)			\
+	CMN_EVENT_ATTR(dn_##_name, CMN_TYPE_DVM, _event, _occup)
+#define CMN_EVENT_DTC(_name)					\
+	CMN_EVENT_ATTR(dtc_##_name, CMN_TYPE_DTC, 0, 0)
+#define _CMN_EVENT_HNF(_name, _event, _occup)			\
+	CMN_EVENT_ATTR(hnf_##_name, CMN_TYPE_HNF, _event, _occup)
+#define CMN_EVENT_HNI(_name, _event)				\
+	CMN_EVENT_ATTR(hni_##_name, CMN_TYPE_HNI, _event, 0)
+#define __CMN_EVENT_XP(_name, _event)				\
+	CMN_EVENT_ATTR(mxp_##_name, CMN_TYPE_XP, _event, 0)
+#define CMN_EVENT_SBSX(_name, _event)				\
+	CMN_EVENT_ATTR(sbsx_##_name, CMN_TYPE_SBSX, _event, 0)
+#define CMN_EVENT_RNID(_name, _event)				\
+	CMN_EVENT_ATTR(rnid_##_name, CMN_TYPE_RNI, _event, 0)
+
+#define CMN_EVENT_DVM(_name, _event)				\
+	_CMN_EVENT_DVM(_name, _event, 0)
+#define CMN_EVENT_HNF(_name, _event)				\
+	_CMN_EVENT_HNF(_name, _event, 0)
+#define _CMN_EVENT_XP(_name, _event)				\
+	__CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),		\
+	__CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)),		\
+	__CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)),		\
+	__CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)),		\
+	__CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)),	\
+	__CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2))
+
+/* Good thing there are only 3 fundamental XP events... */
+#define CMN_EVENT_XP(_name, _event)				\
+	_CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)),	\
+	_CMN_EVENT_XP(rsp_##_name, (_event) | (1 << 5)),	\
+	_CMN_EVENT_XP(snp_##_name, (_event) | (2 << 5)),	\
+	_CMN_EVENT_XP(dat_##_name, (_event) | (3 << 5))
+
+
+static struct attribute *arm_cmn_event_attrs[] = {
+	CMN_EVENT_DTC(cycles),
+
+	/*
+	 * DVM node events conflict with HN-I events in the equivalent PMU
+	 * slot, but our lazy short-cut of using the DTM counter index for
+	 * the PMU index as well happens to avoid that by construction.
+	 */
+	CMN_EVENT_DVM(rxreq_dvmop,	0x01),
+	CMN_EVENT_DVM(rxreq_dvmsync,	0x02),
+	CMN_EVENT_DVM(rxreq_dvmop_vmid_filtered, 0x03),
+	CMN_EVENT_DVM(rxreq_retried,	0x04),
+	_CMN_EVENT_DVM(rxreq_trk_occupancy_all, 0x05, 0),
+	_CMN_EVENT_DVM(rxreq_trk_occupancy_dvmop, 0x05, 1),
+	_CMN_EVENT_DVM(rxreq_trk_occupancy_dvmsync, 0x05, 2),
+
+	CMN_EVENT_HNF(cache_miss,	0x01),
+	CMN_EVENT_HNF(slc_sf_cache_access, 0x02),
+	CMN_EVENT_HNF(cache_fill,	0x03),
+	CMN_EVENT_HNF(pocq_retry,	0x04),
+	CMN_EVENT_HNF(pocq_reqs_recvd,	0x05),
+	CMN_EVENT_HNF(sf_hit,		0x06),
+	CMN_EVENT_HNF(sf_evictions,	0x07),
+	CMN_EVENT_HNF(dir_snoops_sent,	0x08),
+	CMN_EVENT_HNF(brd_snoops_sent,	0x09),
+	CMN_EVENT_HNF(slc_eviction,	0x0a),
+	CMN_EVENT_HNF(slc_fill_invalid_way, 0x0b),
+	CMN_EVENT_HNF(mc_retries,	0x0c),
+	CMN_EVENT_HNF(mc_reqs,		0x0d),
+	CMN_EVENT_HNF(qos_hh_retry,	0x0e),
+	_CMN_EVENT_HNF(qos_pocq_occupancy_all, 0x0f, 0),
+	_CMN_EVENT_HNF(qos_pocq_occupancy_read, 0x0f, 1),
+	_CMN_EVENT_HNF(qos_pocq_occupancy_write, 0x0f, 2),
+	_CMN_EVENT_HNF(qos_pocq_occupancy_atomic, 0x0f, 3),
+	_CMN_EVENT_HNF(qos_pocq_occupancy_stash, 0x0f, 4),
+	CMN_EVENT_HNF(pocq_addrhaz,	0x10),
+	CMN_EVENT_HNF(pocq_atomic_addrhaz, 0x11),
+	CMN_EVENT_HNF(ld_st_swp_adq_full, 0x12),
+	CMN_EVENT_HNF(cmp_adq_full,	0x13),
+	CMN_EVENT_HNF(txdat_stall,	0x14),
+	CMN_EVENT_HNF(txrsp_stall,	0x15),
+	CMN_EVENT_HNF(seq_full,		0x16),
+	CMN_EVENT_HNF(seq_hit,		0x17),
+	CMN_EVENT_HNF(snp_sent,		0x18),
+	CMN_EVENT_HNF(sfbi_dir_snp_sent, 0x19),
+	CMN_EVENT_HNF(sfbi_brd_snp_sent, 0x1a),
+	CMN_EVENT_HNF(snp_sent_untrk,	0x1b),
+	CMN_EVENT_HNF(intv_dirty,	0x1c),
+	CMN_EVENT_HNF(stash_snp_sent,	0x1d),
+	CMN_EVENT_HNF(stash_data_pull,	0x1e),
+	CMN_EVENT_HNF(snp_fwded,	0x1f),
+
+	CMN_EVENT_HNI(rrt_rd_occ_cnt_ovfl, 0x20),
+	CMN_EVENT_HNI(rrt_wr_occ_cnt_ovfl, 0x21),
+	CMN_EVENT_HNI(rdt_rd_occ_cnt_ovfl, 0x22),
+	CMN_EVENT_HNI(rdt_wr_occ_cnt_ovfl, 0x23),
+	CMN_EVENT_HNI(wdb_occ_cnt_ovfl,	0x24),
+	CMN_EVENT_HNI(rrt_rd_alloc,	0x25),
+	CMN_EVENT_HNI(rrt_wr_alloc,	0x26),
+	CMN_EVENT_HNI(rdt_rd_alloc,	0x27),
+	CMN_EVENT_HNI(rdt_wr_alloc,	0x28),
+	CMN_EVENT_HNI(wdb_alloc,	0x29),
+	CMN_EVENT_HNI(txrsp_retryack,	0x2a),
+	CMN_EVENT_HNI(arvalid_no_arready, 0x2b),
+	CMN_EVENT_HNI(arready_no_arvalid, 0x2c),
+	CMN_EVENT_HNI(awvalid_no_awready, 0x2d),
+	CMN_EVENT_HNI(awready_no_awvalid, 0x2e),
+	CMN_EVENT_HNI(wvalid_no_wready,	0x2f),
+	CMN_EVENT_HNI(txdat_stall,	0x30),
+	CMN_EVENT_HNI(nonpcie_serialization, 0x31),
+	CMN_EVENT_HNI(pcie_serialization, 0x32),
+
+	CMN_EVENT_XP(txflit_valid,	0x01),
+	CMN_EVENT_XP(txflit_stall,	0x02),
+	CMN_EVENT_XP(partial_dat_flit,	0x03),
+	/* We treat watchpoints as a special made-up class of XP events */
+	CMN_EVENT_ATTR(watchpoint_up, CMN_TYPE_WP, 0, 0),
+	CMN_EVENT_ATTR(watchpoint_down, CMN_TYPE_WP, 2, 0),
+
+	CMN_EVENT_SBSX(rd_req,		0x01),
+	CMN_EVENT_SBSX(wr_req,		0x02),
+	CMN_EVENT_SBSX(cmo_req,		0x03),
+	CMN_EVENT_SBSX(txrsp_retryack,	0x04),
+	CMN_EVENT_SBSX(txdat_flitv,	0x05),
+	CMN_EVENT_SBSX(txrsp_flitv,	0x06),
+	CMN_EVENT_SBSX(rd_req_trkr_occ_cnt_ovfl, 0x11),
+	CMN_EVENT_SBSX(wr_req_trkr_occ_cnt_ovfl, 0x12),
+	CMN_EVENT_SBSX(cmo_req_trkr_occ_cnt_ovfl, 0x13),
+	CMN_EVENT_SBSX(wdb_occ_cnt_ovfl, 0x14),
+	CMN_EVENT_SBSX(rd_axi_trkr_occ_cnt_ovfl, 0x15),
+	CMN_EVENT_SBSX(cmo_axi_trkr_occ_cnt_ovfl, 0x16),
+	CMN_EVENT_SBSX(arvalid_no_arready, 0x21),
+	CMN_EVENT_SBSX(awvalid_no_awready, 0x22),
+	CMN_EVENT_SBSX(wvalid_no_wready, 0x23),
+	CMN_EVENT_SBSX(txdat_stall,	0x24),
+	CMN_EVENT_SBSX(txrsp_stall,	0x25),
+
+	CMN_EVENT_RNID(s0_rdata_beats,	0x01),
+	CMN_EVENT_RNID(s1_rdata_beats,	0x02),
+	CMN_EVENT_RNID(s2_rdata_beats,	0x03),
+	CMN_EVENT_RNID(rxdat_flits,	0x04),
+	CMN_EVENT_RNID(txdat_flits,	0x05),
+	CMN_EVENT_RNID(txreq_flits_total, 0x06),
+	CMN_EVENT_RNID(txreq_flits_retried, 0x07),
+	CMN_EVENT_RNID(rrt_occ_ovfl,	0x08),
+	CMN_EVENT_RNID(wrt_occ_ovfl,	0x09),
+	CMN_EVENT_RNID(txreq_flits_replayed, 0x0a),
+	CMN_EVENT_RNID(wrcancel_sent,	0x0b),
+	CMN_EVENT_RNID(s0_wdata_beats,	0x0c),
+	CMN_EVENT_RNID(s1_wdata_beats,	0x0d),
+	CMN_EVENT_RNID(s2_wdata_beats,	0x0e),
+	CMN_EVENT_RNID(rrt_alloc,	0x0f),
+	CMN_EVENT_RNID(wrt_alloc,	0x10),
+	CMN_EVENT_RNID(rdb_unord,	0x11),
+	CMN_EVENT_RNID(rdb_replay,	0x12),
+	CMN_EVENT_RNID(rdb_hybrid,	0x13),
+	CMN_EVENT_RNID(rdb_ord,		0x14),
+
+	NULL
+};
+
+static const struct attribute_group arm_cmn_event_attrs_group = {
+	.name = "events",
+	.attrs = arm_cmn_event_attrs,
+	.is_visible = arm_cmn_event_attr_is_visible,
+};
+
+static ssize_t arm_cmn_format_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn_format_attr *fmt = container_of(attr, typeof(*fmt), attr);
+	int lo = __ffs(fmt->field), hi = __fls(fmt->field);
+
+	if (lo == hi)
+		return snprintf(buf, PAGE_SIZE, "config:%d\n", lo);
+
+	if (!fmt->config)
+		return snprintf(buf, PAGE_SIZE, "config:%d-%d\n", lo, hi);
+
+	return snprintf(buf, PAGE_SIZE, "config%d:%d-%d\n", fmt->config, lo, hi);
+}
+
+#define _CMN_FORMAT_ATTR(_name, _cfg, _fld)				\
+	(&((struct arm_cmn_format_attr[]) {{				\
+		.attr = __ATTR(_name, 0444, arm_cmn_format_show, NULL),	\
+		.config = _cfg,						\
+		.field = _fld,						\
+	}})[0].attr.attr)
+#define CMN_FORMAT_ATTR(_name, _fld)	_CMN_FORMAT_ATTR(_name, 0, _fld)
+
+static struct attribute *arm_cmn_format_attrs[] = {
+	CMN_FORMAT_ATTR(type, CMN_CONFIG_TYPE),
+	CMN_FORMAT_ATTR(eventid, CMN_CONFIG_EVENTID),
+	CMN_FORMAT_ATTR(occupid, CMN_CONFIG_OCCUPID),
+	CMN_FORMAT_ATTR(bynodeid, CMN_CONFIG_BYNODEID),
+	CMN_FORMAT_ATTR(nodeid, CMN_CONFIG_NODEID),
+
+	CMN_FORMAT_ATTR(wp_dev_sel, CMN_CONFIG_WP_DEV_SEL),
+	CMN_FORMAT_ATTR(wp_chn_sel, CMN_CONFIG_WP_CHN_SEL),
+	CMN_FORMAT_ATTR(wp_grp, CMN_CONFIG_WP_GRP),
+	CMN_FORMAT_ATTR(wp_exclusive, CMN_CONFIG_WP_EXCLUSIVE),
+	CMN_FORMAT_ATTR(wp_combine, CMN_CONFIG_WP_COMBINE),
+
+	_CMN_FORMAT_ATTR(wp_val, 1, CMN_CONFIG1_WP_VAL),
+	_CMN_FORMAT_ATTR(wp_mask, 2, CMN_CONFIG2_WP_MASK),
+
+	NULL
+};
+
+static const struct attribute_group arm_cmn_format_attrs_group = {
+	.name = "format",
+	.attrs = arm_cmn_format_attrs,
+};
+
+static ssize_t arm_cmn_cpumask_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct arm_cmn *cmn = to_cmn(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(cmn->cpu));
+}
+
+static struct device_attribute arm_cmn_cpumask_attr =
+		__ATTR(cpumask, 0444, arm_cmn_cpumask_show, NULL);
+
+static struct attribute *arm_cmn_cpumask_attrs[] = {
+	&arm_cmn_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group arm_cmn_cpumask_attr_group = {
+	.attrs = arm_cmn_cpumask_attrs,
+};
+
+static const struct attribute_group *arm_cmn_attr_groups[] = {
+	&arm_cmn_event_attrs_group,
+	&arm_cmn_format_attrs_group,
+	&arm_cmn_cpumask_attr_group,
+	NULL
+};
+
+static int arm_cmn_wp_idx(struct perf_event *event)
+{
+	return CMN_EVENT_EVENTID(event) + CMN_EVENT_WP_GRP(event);
+}
+
+static u32 arm_cmn_wp_config(struct perf_event *event)
+{
+	u32 config;
+	u32 dev = CMN_EVENT_WP_DEV_SEL(event);
+	u32 chn = CMN_EVENT_WP_CHN_SEL(event);
+	u32 grp = CMN_EVENT_WP_GRP(event);
+	u32 exc = CMN_EVENT_WP_EXCLUSIVE(event);
+	u32 combine = CMN_EVENT_WP_COMBINE(event);
+
+	config = FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_DEV_SEL, dev) |
+		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_CHN_SEL, chn) |
+		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_GRP, grp) |
+		 FIELD_PREP(CMN_DTM_WPn_CONFIG_WP_EXCLUSIVE, exc);
+	if (combine && !grp)
+		config |= CMN_DTM_WPn_CONFIG_WP_COMBINE;
+
+	return config;
+}
+
+static void arm_cmn_set_state(struct arm_cmn *cmn, u32 state)
+{
+	if (!cmn->state)
+		writel_relaxed(0, cmn->dtc[0].base + CMN_DT_PMCR);
+	cmn->state |= state;
+}
+
+static void arm_cmn_clear_state(struct arm_cmn *cmn, u32 state)
+{
+	cmn->state &= ~state;
+	if (!cmn->state)
+		writel_relaxed(CMN_DT_PMCR_PMU_EN | CMN_DT_PMCR_OVFL_INTR_EN,
+			       cmn->dtc[0].base + CMN_DT_PMCR);
+}
+
+static void arm_cmn_pmu_enable(struct pmu *pmu)
+{
+	arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_DISABLED);
+}
+
+static void arm_cmn_pmu_disable(struct pmu *pmu)
+{
+	arm_cmn_set_state(to_cmn(pmu), CMN_STATE_DISABLED);
+}
+
+static u64 arm_cmn_read_dtm(struct arm_cmn *cmn, struct arm_cmn_hw_event *hw,
+			    bool snapshot)
+{
+	struct arm_cmn_node *dn;
+	unsigned int i, offset;
+	u64 count = 0;
+
+	offset = snapshot ? CMN_DTM_PMEVCNTSR : CMN_DTM_PMEVCNT;
+	for_each_hw_dn(hw, dn, i) {
+		struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn);
+		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+		u64 reg = readq_relaxed(xp->pmu_base + offset);
+		u16 dtm_count = reg >> (dtm_idx * 16);
+
+		count += dtm_count;
+	}
+	return count;
+}
+
+static u64 arm_cmn_read_cc(struct arm_cmn_dtc *dtc)
+{
+	u64 val = readq_relaxed(dtc->base + CMN_DT_PMCCNTR);
+
+	writeq_relaxed(CMN_CC_INIT, dtc->base + CMN_DT_PMCCNTR);
+	return (val - CMN_CC_INIT) & ((CMN_CC_INIT << 1) - 1);
+}
+
+static u32 arm_cmn_read_counter(struct arm_cmn_dtc *dtc, int idx)
+{
+	u32 val, pmevcnt = CMN_DT_PMEVCNT(idx);
+
+	val = readl_relaxed(dtc->base + pmevcnt);
+	writel_relaxed(CMN_COUNTER_INIT, dtc->base + pmevcnt);
+	return val - CMN_COUNTER_INIT;
+}
+
+static void arm_cmn_init_counter(struct perf_event *event)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	unsigned int i, pmevcnt = CMN_DT_PMEVCNT(hw->dtc_idx);
+	u64 count;
+
+	for (i = 0; hw->dtcs_used & (1U << i); i++) {
+		writel_relaxed(CMN_COUNTER_INIT, cmn->dtc[i].base + pmevcnt);
+		cmn->dtc[i].counters[hw->dtc_idx] = event;
+	}
+
+	count = arm_cmn_read_dtm(cmn, hw, false);
+	local64_set(&event->hw.prev_count, count);
+}
+
+static void arm_cmn_event_read(struct perf_event *event)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	u64 delta, new, prev;
+	unsigned long flags;
+	unsigned int i;
+
+	if (hw->dtc_idx == CMN_DT_NUM_COUNTERS) {
+		i = __ffs(hw->dtcs_used);
+		delta = arm_cmn_read_cc(cmn->dtc + i);
+		local64_add(delta, &event->count);
+		return;
+	}
+	new = arm_cmn_read_dtm(cmn, hw, false);
+	prev = local64_xchg(&event->hw.prev_count, new);
+
+	delta = new - prev;
+
+	local_irq_save(flags);
+	for (i = 0; hw->dtcs_used & (1U << i); i++) {
+		new = arm_cmn_read_counter(cmn->dtc + i, hw->dtc_idx);
+		delta += new << 16;
+	}
+	local_irq_restore(flags);
+	local64_add(delta, &event->count);
+}
+
+static void arm_cmn_event_start(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+	int i;
+
+	if (type == CMN_TYPE_DTC) {
+		i = __ffs(hw->dtcs_used);
+		writeq_relaxed(CMN_CC_INIT, cmn->dtc[i].base + CMN_DT_PMCCNTR);
+		cmn->dtc[i].cc_active = true;
+	} else if (type == CMN_TYPE_WP) {
+		int wp_idx = arm_cmn_wp_idx(event);
+		u64 val = CMN_EVENT_WP_VAL(event);
+		u64 mask = CMN_EVENT_WP_MASK(event);
+
+		for_each_hw_dn(hw, dn, i) {
+			writeq_relaxed(val, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx));
+			writeq_relaxed(mask, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx));
+		}
+	} else for_each_hw_dn(hw, dn, i) {
+		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+
+		dn->event[dtm_idx] = CMN_EVENT_EVENTID(event);
+		writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL);
+	}
+}
+
+static void arm_cmn_event_stop(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+	int i;
+
+	if (type == CMN_TYPE_DTC) {
+		i = __ffs(hw->dtcs_used);
+		cmn->dtc[i].cc_active = false;
+	} else if (type == CMN_TYPE_WP) {
+		int wp_idx = arm_cmn_wp_idx(event);
+
+		for_each_hw_dn(hw, dn, i) {
+			writeq_relaxed(0, dn->pmu_base + CMN_DTM_WPn_MASK(wp_idx));
+			writeq_relaxed(~0ULL, dn->pmu_base + CMN_DTM_WPn_VAL(wp_idx));
+		}
+	} else for_each_hw_dn(hw, dn, i) {
+		int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+
+		dn->event[dtm_idx] = 0;
+		writel_relaxed(le32_to_cpu(dn->event_sel), dn->pmu_base + CMN_PMU_EVENT_SEL);
+	}
+
+	arm_cmn_event_read(event);
+}
+
+struct arm_cmn_val {
+	u8 dtm_count[CMN_MAX_XPS];
+	u8 occupid[CMN_MAX_XPS];
+	u8 wp[CMN_MAX_XPS][4];
+	int dtc_count;
+	bool cycles;
+};
+
+static void arm_cmn_val_add_event(struct arm_cmn_val *val, struct perf_event *event)
+{
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type;
+	int i;
+	u8 occupid;
+
+	if (is_software_event(event))
+		return;
+
+	type = CMN_EVENT_TYPE(event);
+	if (type == CMN_TYPE_DTC) {
+		val->cycles = true;
+		return;
+	}
+
+	val->dtc_count++;
+	if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event)))
+		occupid = CMN_EVENT_OCCUPID(event) + 1;
+	else
+		occupid = 0;
+
+	for_each_hw_dn(hw, dn, i) {
+		int wp_idx, xp = arm_cmn_node_to_xp(dn)->logid;
+
+		val->dtm_count[xp]++;
+		val->occupid[xp] = occupid;
+
+		if (type != CMN_TYPE_WP)
+			continue;
+
+		wp_idx = arm_cmn_wp_idx(event);
+		val->wp[xp][wp_idx] = CMN_EVENT_WP_COMBINE(event) + 1;
+	}
+}
+
+static int arm_cmn_validate_group(struct perf_event *event)
+{
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_node *dn;
+	struct perf_event *sibling, *leader = event->group_leader;
+	enum cmn_node_type type;
+	struct arm_cmn_val val;
+	int i;
+	u8 occupid;
+
+	if (leader == event)
+		return 0;
+
+	if (event->pmu != leader->pmu && !is_software_event(leader))
+		return -EINVAL;
+
+	memset(&val, 0, sizeof(val));
+
+	arm_cmn_val_add_event(&val, leader);
+	for_each_sibling_event(sibling, leader)
+		arm_cmn_val_add_event(&val, sibling);
+
+	type = CMN_EVENT_TYPE(event);
+	if (type == CMN_TYPE_DTC)
+		return val.cycles ? -EINVAL : 0;
+
+	if (val.dtc_count == CMN_DT_NUM_COUNTERS)
+		return -EINVAL;
+
+	if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event)))
+		occupid = CMN_EVENT_OCCUPID(event) + 1;
+	else
+		occupid = 0;
+
+	for_each_hw_dn(hw, dn, i) {
+		int wp_idx, wp_cmb, xp = arm_cmn_node_to_xp(dn)->logid;
+
+		if (val.dtm_count[xp] == CMN_DTM_NUM_COUNTERS)
+			return -EINVAL;
+
+		if (occupid && val.occupid[xp] && occupid != val.occupid[xp])
+			return -EINVAL;
+
+		if (type != CMN_TYPE_WP)
+			continue;
+
+		wp_idx = arm_cmn_wp_idx(event);
+		if (val.wp[xp][wp_idx])
+			return -EINVAL;
+
+		wp_cmb = val.wp[xp][wp_idx ^ 1];
+		if (wp_cmb && wp_cmb != CMN_EVENT_WP_COMBINE(event) + 1)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int arm_cmn_event_init(struct perf_event *event)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	enum cmn_node_type type;
+	unsigned int i;
+	bool bynodeid;
+	u16 nodeid, eventid;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	event->cpu = cmn->cpu;
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	type = CMN_EVENT_TYPE(event);
+	/* DTC events (i.e. cycles) already have everything they need */
+	if (type == CMN_TYPE_DTC)
+		return 0;
+
+	/* For watchpoints we need the actual XP node here */
+	if (type == CMN_TYPE_WP) {
+		type = CMN_TYPE_XP;
+		/* ...and we need a "real" direction */
+		eventid = CMN_EVENT_EVENTID(event);
+		if (eventid != CMN_WP_UP && eventid != CMN_WP_DOWN)
+			return -EINVAL;
+	}
+
+	bynodeid = CMN_EVENT_BYNODEID(event);
+	nodeid = CMN_EVENT_NODEID(event);
+
+	hw->dn = arm_cmn_node(cmn, type);
+	for (i = hw->dn - cmn->dns; i < cmn->num_dns && cmn->dns[i].type == type; i++) {
+		if (!bynodeid) {
+			hw->num_dns++;
+		} else if (cmn->dns[i].id != nodeid) {
+			hw->dn++;
+		} else {
+			hw->num_dns = 1;
+			break;
+		}
+	}
+
+	if (!hw->num_dns) {
+		int bits = arm_cmn_xyidbits(cmn);
+
+		dev_dbg(cmn->dev, "invalid node 0x%x (%d,%d,%d,%d) type 0x%x\n",
+			nodeid, CMN_NODEID_X(nodeid, bits), CMN_NODEID_Y(nodeid, bits),
+			CMN_NODEID_PID(nodeid), CMN_NODEID_DEVID(nodeid), type);
+		return -EINVAL;
+	}
+	/*
+	 * By assuming events count in all DTC domains, we cunningly avoid
+	 * needing to know anything about how XPs are assigned to domains.
+	 */
+	hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
+
+	return arm_cmn_validate_group(event);
+}
+
+static void arm_cmn_event_clear(struct arm_cmn *cmn, struct perf_event *event,
+				int i)
+{
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+
+	while (i--) {
+		struct arm_cmn_node *xp = arm_cmn_node_to_xp(hw->dn + i);
+		unsigned int dtm_idx = arm_cmn_get_index(hw->dtm_idx, i);
+
+		if (type == CMN_TYPE_WP)
+			hw->dn[i].wp_event[arm_cmn_wp_idx(event)] = -1;
+
+		if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event)))
+			hw->dn[i].occupid_count--;
+
+		xp->pmu_config_low &= ~CMN__PMEVCNT_PAIRED(dtm_idx);
+		writel_relaxed(xp->pmu_config_low, xp->pmu_base + CMN_DTM_PMU_CONFIG);
+	}
+	memset(hw->dtm_idx, 0, sizeof(hw->dtm_idx));
+
+	for (i = 0; hw->dtcs_used & (1U << i); i++)
+		cmn->dtc[i].counters[hw->dtc_idx] = NULL;
+}
+
+static int arm_cmn_event_add(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	struct arm_cmn_dtc *dtc = &cmn->dtc[0];
+	struct arm_cmn_node *dn;
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+	unsigned int i, dtc_idx, input_sel;
+
+	if (type == CMN_TYPE_DTC) {
+		i = 0;
+		while (cmn->dtc[i].cycles)
+			if (++i == cmn->num_dtcs)
+				return -ENOSPC;
+
+		cmn->dtc[i].cycles = event;
+		hw->dtc_idx = CMN_DT_NUM_COUNTERS;
+		hw->dtcs_used = 1U << i;
+
+		if (flags & PERF_EF_START)
+			arm_cmn_event_start(event, 0);
+		return 0;
+	}
+
+	/* Grab a free global counter first... */
+	dtc_idx = 0;
+	while (dtc->counters[dtc_idx])
+		if (++dtc_idx == CMN_DT_NUM_COUNTERS)
+			return -ENOSPC;
+
+	hw->dtc_idx = dtc_idx;
+
+	/* ...then the local counters to feed it. */
+	for_each_hw_dn(hw, dn, i) {
+		struct arm_cmn_node *xp = arm_cmn_node_to_xp(dn);
+		unsigned int dtm_idx, shift;
+		u64 reg;
+
+		dtm_idx = 0;
+		while (xp->pmu_config_low & CMN__PMEVCNT_PAIRED(dtm_idx))
+			if (++dtm_idx == CMN_DTM_NUM_COUNTERS)
+				goto free_dtms;
+
+		if (type == CMN_TYPE_XP) {
+			input_sel = CMN__PMEVCNT0_INPUT_SEL_XP + dtm_idx;
+		} else if (type == CMN_TYPE_WP) {
+			int tmp, wp_idx = arm_cmn_wp_idx(event);
+			u32 cfg = arm_cmn_wp_config(event);
+
+			if (dn->wp_event[wp_idx] >= 0)
+				goto free_dtms;
+
+			tmp = dn->wp_event[wp_idx ^ 1];
+			if (tmp >= 0 && CMN_EVENT_WP_COMBINE(event) !=
+					CMN_EVENT_WP_COMBINE(dtc->counters[tmp]))
+				goto free_dtms;
+
+			input_sel = CMN__PMEVCNT0_INPUT_SEL_WP + wp_idx;
+			dn->wp_event[wp_idx] = dtc_idx;
+			writel_relaxed(cfg, dn->pmu_base + CMN_DTM_WPn_CONFIG(wp_idx));
+		} else {
+			unsigned int port = CMN_NODEID_PID(dn->id);
+			unsigned int dev = CMN_NODEID_DEVID(dn->id);
+
+			input_sel = CMN__PMEVCNT0_INPUT_SEL_DEV + dtm_idx +
+				    (port << 4) + (dev << 2);
+
+			if (arm_cmn_is_occup_event(type, CMN_EVENT_EVENTID(event))) {
+				int occupid = CMN_EVENT_OCCUPID(event);
+
+				if (dn->occupid_count == 0) {
+					dn->occupid_val = occupid;
+					writel_relaxed(occupid,
+						       dn->pmu_base + CMN_PMU_EVENT_SEL + 4);
+				} else if (dn->occupid_val != occupid) {
+					goto free_dtms;
+				}
+				dn->occupid_count++;
+			}
+		}
+
+		arm_cmn_set_index(hw->dtm_idx, i, dtm_idx);
+
+		xp->input_sel[dtm_idx] = input_sel;
+		shift = CMN__PMEVCNTn_GLOBAL_NUM_SHIFT(dtm_idx);
+		xp->pmu_config_low &= ~(CMN__PMEVCNT0_GLOBAL_NUM << shift);
+		xp->pmu_config_low |= FIELD_PREP(CMN__PMEVCNT0_GLOBAL_NUM, dtc_idx) << shift;
+		xp->pmu_config_low |= CMN__PMEVCNT_PAIRED(dtm_idx);
+		reg = (u64)le32_to_cpu(xp->pmu_config_high) << 32 | xp->pmu_config_low;
+		writeq_relaxed(reg, xp->pmu_base + CMN_DTM_PMU_CONFIG);
+	}
+
+	/* Go go go! */
+	arm_cmn_init_counter(event);
+
+	if (flags & PERF_EF_START)
+		arm_cmn_event_start(event, 0);
+
+	return 0;
+
+free_dtms:
+	arm_cmn_event_clear(cmn, event, i);
+	return -ENOSPC;
+}
+
+static void arm_cmn_event_del(struct perf_event *event, int flags)
+{
+	struct arm_cmn *cmn = to_cmn(event->pmu);
+	struct arm_cmn_hw_event *hw = to_cmn_hw(event);
+	enum cmn_node_type type = CMN_EVENT_TYPE(event);
+
+	arm_cmn_event_stop(event, PERF_EF_UPDATE);
+
+	if (type == CMN_TYPE_DTC)
+		cmn->dtc[__ffs(hw->dtcs_used)].cycles = NULL;
+	else
+		arm_cmn_event_clear(cmn, event, hw->num_dns);
+}
+
+/*
+ * We stop the PMU for both add and read, to avoid skew across DTM counters.
+ * In theory we could use snapshots to read without stopping, but then it
+ * becomes a lot trickier to deal with overlow and racing against interrupts,
+ * plus it seems they don't work properly on some hardware anyway :(
+ */
+static void arm_cmn_start_txn(struct pmu *pmu, unsigned int flags)
+{
+	arm_cmn_set_state(to_cmn(pmu), CMN_STATE_TXN);
+}
+
+static void arm_cmn_end_txn(struct pmu *pmu)
+{
+	arm_cmn_clear_state(to_cmn(pmu), CMN_STATE_TXN);
+}
+
+static int arm_cmn_commit_txn(struct pmu *pmu)
+{
+	arm_cmn_end_txn(pmu);
+	return 0;
+}
+
+static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct arm_cmn *cmn;
+	unsigned int target;
+
+	cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node);
+	if (cpu != cmn->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&cmn->pmu, cpu, target);
+	cmn->cpu = target;
+	return 0;
+}
+
+static irqreturn_t arm_cmn_handle_irq(int irq, void *dev_id)
+{
+	struct arm_cmn_dtc *dtc = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+
+	for (;;) {
+		u32 status = readl_relaxed(dtc->base + CMN_DT_PMOVSR);
+		u64 delta;
+		int i;
+
+		for (i = 0; i < CMN_DTM_NUM_COUNTERS; i++) {
+			if (status & (1U << i)) {
+				ret = IRQ_HANDLED;
+				if (WARN_ON(!dtc->counters[i]))
+					continue;
+				delta = (u64)arm_cmn_read_counter(dtc, i) << 16;
+				local64_add(delta, &dtc->counters[i]->count);
+			}
+		}
+
+		if (status & (1U << CMN_DT_NUM_COUNTERS)) {
+			ret = IRQ_HANDLED;
+			if (dtc->cc_active && !WARN_ON(!dtc->cycles)) {
+				delta = arm_cmn_read_cc(dtc);
+				local64_add(delta, &dtc->cycles->count);
+			}
+		}
+
+		writel_relaxed(status, dtc->base + CMN_DT_PMOVSR_CLR);
+
+		if (!dtc->irq_friend)
+			return ret;
+		dtc += dtc->irq_friend;
+	}
+}
+
+/* We can reasonably accommodate DTCs of the same CMN sharing IRQs */
+static int arm_cmn_init_irqs(struct arm_cmn *cmn)
+{
+	int i, j, irq, err;
+
+	for (i = 0; i < cmn->num_dtcs; i++) {
+		irq = cmn->dtc[i].irq;
+		for (j = i; j--; ) {
+			if (cmn->dtc[j].irq == irq) {
+				cmn->dtc[j].irq_friend = j - i;
+				goto next;
+			}
+		}
+		err = devm_request_irq(cmn->dev, irq, arm_cmn_handle_irq,
+				       IRQF_NOBALANCING | IRQF_NO_THREAD,
+				       dev_name(cmn->dev), &cmn->dtc[i]);
+		if (err)
+			return err;
+
+		err = irq_set_affinity_hint(irq, cpumask_of(cmn->cpu));
+		if (err)
+			return err;
+	next:
+		; /* isn't C great? */
+	}
+	return 0;
+}
+
+static void arm_cmn_init_dtm(struct arm_cmn_node *xp)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		xp->wp_event[i] = -1;
+		writeq_relaxed(0, xp->pmu_base + CMN_DTM_WPn_MASK(i));
+		writeq_relaxed(~0ULL, xp->pmu_base + CMN_DTM_WPn_VAL(i));
+	}
+	xp->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
+	xp->dtc = -1;
+}
+
+static int arm_cmn_init_dtc(struct arm_cmn *cmn, struct arm_cmn_node *dn, int idx)
+{
+	struct arm_cmn_dtc *dtc = cmn->dtc + idx;
+	struct arm_cmn_node *xp;
+
+	dtc->base = dn->pmu_base - CMN_PMU_OFFSET;
+	dtc->irq = platform_get_irq(to_platform_device(cmn->dev), idx);
+	if (dtc->irq < 0)
+		return dtc->irq;
+
+	writel_relaxed(0, dtc->base + CMN_DT_PMCR);
+	writel_relaxed(0x1ff, dtc->base + CMN_DT_PMOVSR_CLR);
+	writel_relaxed(CMN_DT_PMCR_OVFL_INTR_EN, dtc->base + CMN_DT_PMCR);
+
+	/* We do at least know that a DTC's XP must be in that DTC's domain */
+	xp = arm_cmn_node_to_xp(dn);
+	xp->dtc = idx;
+
+	return 0;
+}
+
+static int arm_cmn_node_cmp(const void *a, const void *b)
+{
+	const struct arm_cmn_node *dna = a, *dnb = b;
+	int cmp;
+
+	cmp = dna->type - dnb->type;
+	if (!cmp)
+		cmp = dna->logid - dnb->logid;
+	return cmp;
+}
+
+static int arm_cmn_init_dtcs(struct arm_cmn *cmn)
+{
+	struct arm_cmn_node *dn;
+	int dtc_idx = 0;
+
+	cmn->dtc = devm_kcalloc(cmn->dev, cmn->num_dtcs, sizeof(cmn->dtc[0]), GFP_KERNEL);
+	if (!cmn->dtc)
+		return -ENOMEM;
+
+	sort(cmn->dns, cmn->num_dns, sizeof(cmn->dns[0]), arm_cmn_node_cmp, NULL);
+
+	cmn->xps = arm_cmn_node(cmn, CMN_TYPE_XP);
+
+	for (dn = cmn->dns; dn < cmn->dns + cmn->num_dns; dn++) {
+		if (dn->type != CMN_TYPE_XP)
+			arm_cmn_init_node_to_xp(cmn, dn);
+		else if (cmn->num_dtcs == 1)
+			dn->dtc = 0;
+
+		if (dn->type == CMN_TYPE_DTC)
+			arm_cmn_init_dtc(cmn, dn, dtc_idx++);
+
+		/* To the PMU, RN-Ds don't add anything over RN-Is, so smoosh them together */
+		if (dn->type == CMN_TYPE_RND)
+			dn->type = CMN_TYPE_RNI;
+	}
+
+	writel_relaxed(CMN_DT_DTC_CTL_DT_EN, cmn->dtc[0].base + CMN_DT_DTC_CTL);
+
+	return 0;
+}
+
+static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_cmn_node *node)
+{
+	int level;
+	u64 reg = readq_relaxed(cmn->base + offset + CMN_NODE_INFO);
+
+	node->type = FIELD_GET(CMN_NI_NODE_TYPE, reg);
+	node->id = FIELD_GET(CMN_NI_NODE_ID, reg);
+	node->logid = FIELD_GET(CMN_NI_LOGICAL_ID, reg);
+
+	node->pmu_base = cmn->base + offset + CMN_PMU_OFFSET;
+
+	if (node->type == CMN_TYPE_CFG)
+		level = 0;
+	else if (node->type == CMN_TYPE_XP)
+		level = 1;
+	else
+		level = 2;
+
+	dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#x\n",
+			(level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ',
+			node->type, node->logid, offset);
+}
+
+static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
+{
+	void __iomem *cfg_region;
+	struct arm_cmn_node cfg, *dn;
+	u16 child_count, child_poff;
+	u32 xp_offset[CMN_MAX_XPS];
+	u64 reg;
+	int i, j;
+
+	cfg_region = cmn->base + rgn_offset;
+	reg = readl_relaxed(cfg_region + CMN_CFGM_PERIPH_ID_2);
+	cmn->rev = FIELD_GET(CMN_CFGM_PID2_REVISION, reg);
+	dev_dbg(cmn->dev, "periph_id_2 revision: %d\n", cmn->rev);
+
+	arm_cmn_init_node_info(cmn, rgn_offset, &cfg);
+	if (cfg.type != CMN_TYPE_CFG)
+		return -ENODEV;
+
+	reg = readq_relaxed(cfg_region + CMN_CHILD_INFO);
+	child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
+	child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg);
+
+	cmn->num_xps = child_count;
+	cmn->num_dns = cmn->num_xps;
+
+	/* Pass 1: visit the XPs, enumerate their children */
+	for (i = 0; i < cmn->num_xps; i++) {
+		reg = readq_relaxed(cfg_region + child_poff + i * 8);
+		xp_offset[i] = reg & CMN_CHILD_NODE_ADDR;
+
+		reg = readq_relaxed(cmn->base + xp_offset[i] + CMN_CHILD_INFO);
+		cmn->num_dns += FIELD_GET(CMN_CI_CHILD_COUNT, reg);
+	}
+
+	/* Cheeky +1 to help terminate pointer-based iteration */
+	cmn->dns = devm_kcalloc(cmn->dev, cmn->num_dns + 1,
+				sizeof(*cmn->dns), GFP_KERNEL);
+	if (!cmn->dns)
+		return -ENOMEM;
+
+	/* Pass 2: now we can actually populate the nodes */
+	dn = cmn->dns;
+	for (i = 0; i < cmn->num_xps; i++) {
+		void __iomem *xp_region = cmn->base + xp_offset[i];
+		struct arm_cmn_node *xp = dn++;
+
+		arm_cmn_init_node_info(cmn, xp_offset[i], xp);
+		arm_cmn_init_dtm(xp);
+		/*
+		 * Thanks to the order in which XP logical IDs seem to be
+		 * assigned, we can handily infer the mesh X dimension by
+		 * looking out for the XP at (0,1) without needing to know
+		 * the exact node ID format, which we can later derive.
+		 */
+		if (xp->id == (1 << 3))
+			cmn->mesh_x = xp->logid;
+
+		reg = readq_relaxed(xp_region + CMN_CHILD_INFO);
+		child_count = FIELD_GET(CMN_CI_CHILD_COUNT, reg);
+		child_poff = FIELD_GET(CMN_CI_CHILD_PTR_OFFSET, reg);
+
+		for (j = 0; j < child_count; j++) {
+			reg = readq_relaxed(xp_region + child_poff + j * 8);
+			/*
+			 * Don't even try to touch anything external, since in general
+			 * we haven't a clue how to power up arbitrary CHI requesters.
+			 * As of CMN-600r1 these could only be RN-SAMs or CXLAs,
+			 * neither of which have any PMU events anyway.
+			 * (Actually, CXLAs do seem to have grown some events in r1p2,
+			 * but they don't go to regular XP DTMs, and they depend on
+			 * secure configuration which we can't easily deal with)
+			 */
+			if (reg & CMN_CHILD_NODE_EXTERNAL) {
+				dev_dbg(cmn->dev, "ignoring external node %llx\n", reg);
+				continue;
+			}
+
+			arm_cmn_init_node_info(cmn, reg & CMN_CHILD_NODE_ADDR, dn);
+
+			switch (dn->type) {
+			case CMN_TYPE_DTC:
+				cmn->num_dtcs++;
+				dn++;
+				break;
+			/* These guys have PMU events */
+			case CMN_TYPE_DVM:
+			case CMN_TYPE_HNI:
+			case CMN_TYPE_HNF:
+			case CMN_TYPE_SBSX:
+			case CMN_TYPE_RNI:
+			case CMN_TYPE_RND:
+			case CMN_TYPE_CXRA:
+			case CMN_TYPE_CXHA:
+				dn++;
+				break;
+			/* Nothing to see here */
+			case CMN_TYPE_RNSAM:
+			case CMN_TYPE_CXLA:
+				break;
+			/* Something has gone horribly wrong */
+			default:
+				dev_err(cmn->dev, "invalid device node type: 0x%hx\n", dn->type);
+				return -ENODEV;
+			}
+		}
+	}
+
+	/* Correct for any nodes we skipped */
+	cmn->num_dns = dn - cmn->dns;
+
+	/*
+	 * If mesh_x wasn't set during discovery then we never saw
+	 * an XP at (0,1), thus we must have an Nx1 configuration.
+	 */
+	if (!cmn->mesh_x)
+		cmn->mesh_x = cmn->num_xps;
+	cmn->mesh_y = cmn->num_xps / cmn->mesh_x;
+
+	dev_dbg(cmn->dev, "mesh %dx%d, ID width %d\n",
+		cmn->mesh_x, cmn->mesh_y, arm_cmn_xyidbits(cmn));
+
+	return 0;
+}
+
+static int arm_cmn_acpi_probe(struct platform_device *pdev, struct arm_cmn *cmn)
+{
+	struct resource *cfg, *root;
+
+	cfg = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!cfg)
+		return -EINVAL;
+
+	root = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!root)
+		return -EINVAL;
+
+	if (!resource_contains(cfg, root))
+		swap(cfg, root);
+	/*
+	 * Note that devm_ioremap_resource() is dumb and won't let the platform
+	 * device claim cfg when the ACPI companion device has already claimed
+	 * root within it. But since they *are* already both claimed in the
+	 * appropriate name, we don't really need to do it again here anyway.
+	 */
+	cmn->base = devm_ioremap(cmn->dev, cfg->start, resource_size(cfg));
+	if (!cmn->base)
+		return -ENOMEM;
+
+	return root->start - cfg->start;
+}
+
+static int arm_cmn_of_probe(struct platform_device *pdev, struct arm_cmn *cmn)
+{
+	struct device_node *np = pdev->dev.of_node;
+	u32 rootnode;
+	int ret;
+
+	cmn->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(cmn->base))
+		return PTR_ERR(cmn->base);
+
+	ret = of_property_read_u32(np, "arm,root-node", &rootnode);
+	if (ret)
+		return ret;
+
+	return rootnode;
+}
+
+static int arm_cmn_probe(struct platform_device *pdev)
+{
+	struct arm_cmn *cmn;
+	const char *name;
+	static atomic_t id;
+	int err, rootnode, this_id;
+
+	cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL);
+	if (!cmn)
+		return -ENOMEM;
+
+	cmn->dev = &pdev->dev;
+	platform_set_drvdata(pdev, cmn);
+
+	if (has_acpi_companion(cmn->dev))
+		rootnode = arm_cmn_acpi_probe(pdev, cmn);
+	else
+		rootnode = arm_cmn_of_probe(pdev, cmn);
+	if (rootnode < 0)
+		return rootnode;
+
+	err = arm_cmn_discover(cmn, rootnode);
+	if (err)
+		return err;
+
+	err = arm_cmn_init_dtcs(cmn);
+	if (err)
+		return err;
+
+	err = arm_cmn_init_irqs(cmn);
+	if (err)
+		return err;
+
+	cmn->cpu = raw_smp_processor_id();
+	cmn->pmu = (struct pmu) {
+		.module = THIS_MODULE,
+		.attr_groups = arm_cmn_attr_groups,
+		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
+		.task_ctx_nr = perf_invalid_context,
+		.pmu_enable = arm_cmn_pmu_enable,
+		.pmu_disable = arm_cmn_pmu_disable,
+		.event_init = arm_cmn_event_init,
+		.add = arm_cmn_event_add,
+		.del = arm_cmn_event_del,
+		.start = arm_cmn_event_start,
+		.stop = arm_cmn_event_stop,
+		.read = arm_cmn_event_read,
+		.start_txn = arm_cmn_start_txn,
+		.commit_txn = arm_cmn_commit_txn,
+		.cancel_txn = arm_cmn_end_txn,
+	};
+
+	this_id = atomic_fetch_inc(&id);
+	if (this_id == 0) {
+		name = "arm_cmn";
+	} else {
+		name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id);
+		if (!name)
+			return -ENOMEM;
+	}
+
+	err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
+	if (err)
+		return err;
+
+	err = perf_pmu_register(&cmn->pmu, name, -1);
+	if (err)
+		cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
+	return err;
+}
+
+static int arm_cmn_remove(struct platform_device *pdev)
+{
+	struct arm_cmn *cmn = platform_get_drvdata(pdev);
+	int i;
+
+	writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL);
+
+	perf_pmu_unregister(&cmn->pmu);
+	cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
+
+	for (i = 0; i < cmn->num_dtcs; i++)
+		irq_set_affinity_hint(cmn->dtc[i].irq, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id arm_cmn_of_match[] = {
+	{ .compatible = "arm,cmn-600", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, arm_cmn_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id arm_cmn_acpi_match[] = {
+	{ "ARMHC600", },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, arm_cmn_acpi_match);
+#endif
+
+static struct platform_driver arm_cmn_driver = {
+	.driver = {
+		.name = "arm-cmn",
+		.of_match_table = of_match_ptr(arm_cmn_of_match),
+		.acpi_match_table = ACPI_PTR(arm_cmn_acpi_match),
+	},
+	.probe = arm_cmn_probe,
+	.remove = arm_cmn_remove,
+};
+
+static int __init arm_cmn_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/arm/cmn:online", NULL,
+				      arm_cmn_pmu_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	arm_cmn_hp_state = ret;
+	ret = platform_driver_register(&arm_cmn_driver);
+	if (ret)
+		cpuhp_remove_multi_state(arm_cmn_hp_state);
+	return ret;
+}
+
+static void __exit arm_cmn_exit(void)
+{
+	platform_driver_unregister(&arm_cmn_driver);
+	cpuhp_remove_multi_state(arm_cmn_hp_state);
+}
+
+module_init(arm_cmn_init);
+module_exit(arm_cmn_exit);
+
+MODULE_AUTHOR("Robin Murphy <robin.murphy@arm.com>");
+MODULE_DESCRIPTION("Arm CMN-600 PMU driver");
+MODULE_LICENSE("GPL v2");

From 490d7b7c0845eacf5593db333fd2ae7715416e16 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Thu, 24 Sep 2020 12:07:00 +0100
Subject: [PATCH 172/262] arm64: perf: Add missing ISB in
 armv8pmu_enable_counter()

Writes to the PMXEVTYPER_EL0 register are not self-synchronising. In
armv8pmu_enable_event(), the PE can reorder configuring the event type
after we have enabled the counter and the interrupt. This can lead to an
interrupt being asserted because of the previous event type that we were
counting using the same counter, not the one that we've just configured.

The same rationale applies to writes to the PMINTENSET_EL1 register. The PE
can reorder enabling the interrupt at any point in the future after we have
enabled the event.

Prevent both situations from happening by adding an ISB just before we
enable the event counter.

Fixes: 030896885ade ("arm64: Performance counters support")
Reported-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Sumit Garg <sumit.garg@linaro.org> (Developerbox)
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200924110706.254996-2-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 34a7cd3af6997c..e43ab350a5a2d4 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -541,6 +541,11 @@ static u32 armv8pmu_event_cnten_mask(struct perf_event *event)
 
 static inline void armv8pmu_enable_counter(u32 mask)
 {
+	/*
+	 * Make sure event configuration register writes are visible before we
+	 * enable the counter.
+	 * */
+	isb();
 	write_sysreg(mask, pmcntenset_el0);
 }
 

From 0fdf1bb75953a67e63e5055a7709c629ab6d7692 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 24 Sep 2020 12:07:01 +0100
Subject: [PATCH 173/262] arm64: perf: Avoid PMXEV* indirection

Currently we access the counter registers and their respective type
registers indirectly. This requires us to write to PMSELR, issue an ISB,
then access the relevant PMXEV* registers.

This is unfortunate, because:

* Under virtualization, accessing one register requires two traps to
  the hypervisor, even though we could access the register directly with
  a single trap.

* We have to issue an ISB which we could otherwise avoid the cost of.

* When we use NMIs, the NMI handler will have to save/restore the select
  register in case the code it preempted was attempting to access a
  counter or its type register.

We can avoid these issues by directly accessing the relevant registers.
This patch adds helpers to do so.

In armv8pmu_enable_event() we still need the ISB to prevent the PE from
reordering the write to PMINTENSET_EL1 register. If the interrupt is
enabled before we disable the counter and the new event is configured,
we might get an interrupt triggered by the previously programmed event
overflowing, but which we wrongly attribute to the event that we are
enabling. Execute an ISB after we disable the counter.

In the process, remove the comment that refers to the ARMv7 PMU.

[Julien T.: Don't inline read/write functions to avoid big code-size
	increase, remove unused read_pmevtypern function,
	fix counter index issue.]
[Alexandru E.: Removed comment, removed trailing semicolons in macros,
	added ISB]

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Sumit Garg <sumit.garg@linaro.org> (Developerbox)
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200924110706.254996-3-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 99 +++++++++++++++++++++++++++++-----
 1 file changed, 85 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index e43ab350a5a2d4..39596ed4ab8695 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -371,6 +371,73 @@ static inline bool armv8pmu_event_is_chained(struct perf_event *event)
 #define	ARMV8_IDX_TO_COUNTER(x)	\
 	(((x) - ARMV8_IDX_COUNTER0) & ARMV8_PMU_COUNTER_MASK)
 
+/*
+ * This code is really good
+ */
+
+#define PMEVN_CASE(n, case_macro) \
+	case n: case_macro(n); break
+
+#define PMEVN_SWITCH(x, case_macro)				\
+	do {							\
+		switch (x) {					\
+		PMEVN_CASE(0,  case_macro);			\
+		PMEVN_CASE(1,  case_macro);			\
+		PMEVN_CASE(2,  case_macro);			\
+		PMEVN_CASE(3,  case_macro);			\
+		PMEVN_CASE(4,  case_macro);			\
+		PMEVN_CASE(5,  case_macro);			\
+		PMEVN_CASE(6,  case_macro);			\
+		PMEVN_CASE(7,  case_macro);			\
+		PMEVN_CASE(8,  case_macro);			\
+		PMEVN_CASE(9,  case_macro);			\
+		PMEVN_CASE(10, case_macro);			\
+		PMEVN_CASE(11, case_macro);			\
+		PMEVN_CASE(12, case_macro);			\
+		PMEVN_CASE(13, case_macro);			\
+		PMEVN_CASE(14, case_macro);			\
+		PMEVN_CASE(15, case_macro);			\
+		PMEVN_CASE(16, case_macro);			\
+		PMEVN_CASE(17, case_macro);			\
+		PMEVN_CASE(18, case_macro);			\
+		PMEVN_CASE(19, case_macro);			\
+		PMEVN_CASE(20, case_macro);			\
+		PMEVN_CASE(21, case_macro);			\
+		PMEVN_CASE(22, case_macro);			\
+		PMEVN_CASE(23, case_macro);			\
+		PMEVN_CASE(24, case_macro);			\
+		PMEVN_CASE(25, case_macro);			\
+		PMEVN_CASE(26, case_macro);			\
+		PMEVN_CASE(27, case_macro);			\
+		PMEVN_CASE(28, case_macro);			\
+		PMEVN_CASE(29, case_macro);			\
+		PMEVN_CASE(30, case_macro);			\
+		default: WARN(1, "Invalid PMEV* index\n");	\
+		}						\
+	} while (0)
+
+#define RETURN_READ_PMEVCNTRN(n) \
+	return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+	PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+	return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+	write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+	PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+}
+
+#define WRITE_PMEVTYPERN(n) \
+	write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+	PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+}
+
 static inline u32 armv8pmu_pmcr_read(void)
 {
 	return read_sysreg(pmcr_el0);
@@ -393,17 +460,11 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 	return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
 }
 
-static inline void armv8pmu_select_counter(int idx)
+static inline u32 armv8pmu_read_evcntr(int idx)
 {
 	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
-	write_sysreg(counter, pmselr_el0);
-	isb();
-}
 
-static inline u64 armv8pmu_read_evcntr(int idx)
-{
-	armv8pmu_select_counter(idx);
-	return read_sysreg(pmxevcntr_el0);
+	return read_pmevcntrn(counter);
 }
 
 static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
@@ -471,8 +532,9 @@ static u64 armv8pmu_read_counter(struct perf_event *event)
 
 static inline void armv8pmu_write_evcntr(int idx, u64 value)
 {
-	armv8pmu_select_counter(idx);
-	write_sysreg(value, pmxevcntr_el0);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
+	write_pmevcntrn(counter, value);
 }
 
 static inline void armv8pmu_write_hw_counter(struct perf_event *event,
@@ -503,9 +565,10 @@ static void armv8pmu_write_counter(struct perf_event *event, u64 value)
 
 static inline void armv8pmu_write_evtype(int idx, u32 val)
 {
-	armv8pmu_select_counter(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
+
 	val &= ARMV8_PMU_EVTYPE_MASK;
-	write_sysreg(val, pmxevtyper_el0);
+	write_pmevtypern(counter, val);
 }
 
 static inline void armv8pmu_write_event_type(struct perf_event *event)
@@ -525,7 +588,10 @@ static inline void armv8pmu_write_event_type(struct perf_event *event)
 		armv8pmu_write_evtype(idx - 1, hwc->config_base);
 		armv8pmu_write_evtype(idx, chain_evt);
 	} else {
-		armv8pmu_write_evtype(idx, hwc->config_base);
+		if (idx == ARMV8_IDX_CYCLE_COUNTER)
+			write_sysreg(hwc->config_base, pmccfiltr_el0);
+		else
+			armv8pmu_write_evtype(idx, hwc->config_base);
 	}
 }
 
@@ -564,6 +630,11 @@ static inline void armv8pmu_enable_event_counter(struct perf_event *event)
 static inline void armv8pmu_disable_counter(u32 mask)
 {
 	write_sysreg(mask, pmcntenclr_el0);
+	/*
+	 * Make sure the effects of disabling the counter are visible before we
+	 * start configuring the event.
+	 */
+	isb();
 }
 
 static inline void armv8pmu_disable_event_counter(struct perf_event *event)
@@ -636,7 +707,7 @@ static void armv8pmu_enable_event(struct perf_event *event)
 	armv8pmu_disable_event_counter(event);
 
 	/*
-	 * Set event (if destined for PMNx counters).
+	 * Set event.
 	 */
 	armv8pmu_write_event_type(event);
 

From 2a0e2a02e4b719174547d6f04c27410c6fe456f5 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 24 Sep 2020 12:07:02 +0100
Subject: [PATCH 174/262] arm64: perf: Remove PMU locking

The PMU is disabled and enabled, and the counters are programmed from
contexts where interrupts or preemption is disabled.

The functions to toggle the PMU and to program the PMU counters access the
registers directly and don't access data modified by the interrupt handler.
That, and the fact that they're always called from non-preemptible
contexts, means that we don't need to disable interrupts or use a spinlock.

[Alexandru E.: Explained why locking is not needed, removed WARN_ONs]

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Sumit Garg <sumit.garg@linaro.org> (Developerbox)
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200924110706.254996-4-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 39596ed4ab8695..077df5aea19a58 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -691,15 +691,10 @@ static inline u32 armv8pmu_getreset_flags(void)
 
 static void armv8pmu_enable_event(struct perf_event *event)
 {
-	unsigned long flags;
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
 	/*
 	 * Enable counter and interrupt, and set the counter to count
 	 * the event that we're interested in.
 	 */
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
 	/*
 	 * Disable counter
@@ -720,21 +715,10 @@ static void armv8pmu_enable_event(struct perf_event *event)
 	 * Enable counter
 	 */
 	armv8pmu_enable_event_counter(event);
-
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv8pmu_disable_event(struct perf_event *event)
 {
-	unsigned long flags;
-	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
-	/*
-	 * Disable counter and interrupt
-	 */
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
-
 	/*
 	 * Disable counter
 	 */
@@ -744,30 +728,18 @@ static void armv8pmu_disable_event(struct perf_event *event)
 	 * Disable interrupt for this counter
 	 */
 	armv8pmu_disable_event_irq(event);
-
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 {
-	unsigned long flags;
-	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
 	armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
 {
-	unsigned long flags;
-	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
-
-	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
 	armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E);
-	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)

From 05ab72813340d11205556c0d1bc08e6857a3856c Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 24 Sep 2020 12:07:03 +0100
Subject: [PATCH 175/262] arm64: perf: Defer irq_work to IPI_IRQ_WORK

When handling events, armv8pmu_handle_irq() calls perf_event_overflow(),
and subsequently calls irq_work_run() to handle any work queued by
perf_event_overflow(). As perf_event_overflow() raises IPI_IRQ_WORK when
queuing the work, this isn't strictly necessary and the work could be
handled as part of the IPI_IRQ_WORK handler.

In the common case the IPI handler will run immediately after the PMU IRQ
handler, and where the PE is heavily loaded with interrupts other handlers
may run first, widening the window where some counters are disabled.

In practice this window is unlikely to be a significant issue, and removing
the call to irq_work_run() would make the PMU IRQ handler NMI safe in
addition to making it simpler, so let's do that.

[Alexandru E.: Reworded commit message]

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200924110706.254996-5-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 077df5aea19a58..3605f77ad4df17 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -792,20 +792,16 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
 		if (!armpmu_event_set_period(event))
 			continue;
 
+		/*
+		 * Perf event overflow will queue the processing of the event as
+		 * an irq_work which will be taken care of in the handling of
+		 * IPI_IRQ_WORK.
+		 */
 		if (perf_event_overflow(event, &data, regs))
 			cpu_pmu->disable(event);
 	}
 	armv8pmu_start(cpu_pmu);
 
-	/*
-	 * Handle the pending perf events.
-	 *
-	 * Note: this call *must* be run with interrupts disabled. For
-	 * platforms that can have the PMU interrupts raised as an NMI, this
-	 * will not work.
-	 */
-	irq_work_run();
-
 	return IRQ_HANDLED;
 }
 

From 95e92e45a454a10a8114294d0f7aec930fb85891 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 24 Sep 2020 12:07:04 +0100
Subject: [PATCH 176/262] KVM: arm64: pmu: Make overflow handler NMI safe

kvm_vcpu_kick() is not NMI safe. When the overflow handler is called from
NMI context, defer waking the vcpu to an irq_work queue.

A vcpu can be freed while it's not running by kvm_destroy_vm(). Prevent
running the irq_work for a non-existent vcpu by calling irq_work_sync() on
the PMU destroy path.

[Alexandru E.: Added irq_work_sync()]

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Sumit Garg <sumit.garg@linaro.org> (Developerbox)
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Suzuki K Pouloze <suzuki.poulose@arm.com>
Cc: kvm@vger.kernel.org
Cc: kvmarm@lists.cs.columbia.edu
Link: https://lore.kernel.org/r/20200924110706.254996-6-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/pmu-emul.c | 26 +++++++++++++++++++++++++-
 include/kvm/arm_pmu.h     |  1 +
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f0d0312c0a552c..81916e360b1e96 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -269,6 +269,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
 		kvm_pmu_release_perf_event(&pmu->pmc[i]);
+	irq_work_sync(&vcpu->arch.pmu.overflow_work);
 }
 
 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
@@ -433,6 +434,22 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
 	kvm_pmu_update_state(vcpu);
 }
 
+/**
+ * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding
+ * to the event.
+ * This is why we need a callback to do it once outside of the NMI context.
+ */
+static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_pmu *pmu;
+
+	pmu = container_of(work, struct kvm_pmu, overflow_work);
+	vcpu = kvm_pmc_to_vcpu(pmu->pmc);
+
+	kvm_vcpu_kick(vcpu);
+}
+
 /**
  * When the perf event overflows, set the overflow status and inform the vcpu.
  */
@@ -465,7 +482,11 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 
 	if (kvm_pmu_overflow_status(vcpu)) {
 		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
-		kvm_vcpu_kick(vcpu);
+
+		if (!in_nmi())
+			kvm_vcpu_kick(vcpu);
+		else
+			irq_work_queue(&vcpu->arch.pmu.overflow_work);
 	}
 
 	cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
@@ -764,6 +785,9 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
 			return ret;
 	}
 
+	init_irq_work(&vcpu->arch.pmu.overflow_work,
+		      kvm_pmu_perf_overflow_notify_vcpu);
+
 	vcpu->arch.pmu.created = true;
 	return 0;
 }
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 6db030439e29c2..dbf4f08d42e52a 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -27,6 +27,7 @@ struct kvm_pmu {
 	bool ready;
 	bool created;
 	bool irq_level;
+	struct irq_work overflow_work;
 };
 
 #define kvm_arm_pmu_v3_ready(v)		((v)->arch.pmu.ready)

From f76b130bdb8949eac002b8e0ddb85576ed137838 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 24 Sep 2020 12:07:05 +0100
Subject: [PATCH 177/262] arm_pmu: Introduce pmu_irq_ops

Currently the PMU interrupt can either be a normal irq or a percpu irq.
Supporting NMI will introduce two cases for each existing one. It becomes
a mess of 'if's when managing the interrupt.

Define sets of callbacks for operations commonly done on the interrupt. The
appropriate set of callbacks is selected at interrupt request time and
simplifies interrupt enabling/disabling and freeing.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Sumit Garg <sumit.garg@linaro.org> (Developerbox)
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200924110706.254996-7-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 90 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 74 insertions(+), 16 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index df352b334ea77a..a770726e98d4d9 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -26,8 +26,46 @@
 
 #include <asm/irq_regs.h>
 
+static int armpmu_count_irq_users(const int irq);
+
+struct pmu_irq_ops {
+	void (*enable_pmuirq)(unsigned int irq);
+	void (*disable_pmuirq)(unsigned int irq);
+	void (*free_pmuirq)(unsigned int irq, int cpu, void __percpu *devid);
+};
+
+static void armpmu_free_pmuirq(unsigned int irq, int cpu, void __percpu *devid)
+{
+	free_irq(irq, per_cpu_ptr(devid, cpu));
+}
+
+static const struct pmu_irq_ops pmuirq_ops = {
+	.enable_pmuirq = enable_irq,
+	.disable_pmuirq = disable_irq_nosync,
+	.free_pmuirq = armpmu_free_pmuirq
+};
+
+static void armpmu_enable_percpu_pmuirq(unsigned int irq)
+{
+	enable_percpu_irq(irq, IRQ_TYPE_NONE);
+}
+
+static void armpmu_free_percpu_pmuirq(unsigned int irq, int cpu,
+				   void __percpu *devid)
+{
+	if (armpmu_count_irq_users(irq) == 1)
+		free_percpu_irq(irq, devid);
+}
+
+static const struct pmu_irq_ops percpu_pmuirq_ops = {
+	.enable_pmuirq = armpmu_enable_percpu_pmuirq,
+	.disable_pmuirq = disable_percpu_irq,
+	.free_pmuirq = armpmu_free_percpu_pmuirq
+};
+
 static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 static DEFINE_PER_CPU(int, cpu_irq);
+static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
 
 static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
@@ -544,6 +582,23 @@ static int armpmu_count_irq_users(const int irq)
 	return count;
 }
 
+static const struct pmu_irq_ops *armpmu_find_irq_ops(int irq)
+{
+	const struct pmu_irq_ops *ops = NULL;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(cpu_irq, cpu) != irq)
+			continue;
+
+		ops = per_cpu(cpu_irq_ops, cpu);
+		if (ops)
+			break;
+	}
+
+	return ops;
+}
+
 void armpmu_free_irq(int irq, int cpu)
 {
 	if (per_cpu(cpu_irq, cpu) == 0)
@@ -551,18 +606,18 @@ void armpmu_free_irq(int irq, int cpu)
 	if (WARN_ON(irq != per_cpu(cpu_irq, cpu)))
 		return;
 
-	if (!irq_is_percpu_devid(irq))
-		free_irq(irq, per_cpu_ptr(&cpu_armpmu, cpu));
-	else if (armpmu_count_irq_users(irq) == 1)
-		free_percpu_irq(irq, &cpu_armpmu);
+	per_cpu(cpu_irq_ops, cpu)->free_pmuirq(irq, cpu, &cpu_armpmu);
 
 	per_cpu(cpu_irq, cpu) = 0;
+	per_cpu(cpu_irq_ops, cpu) = NULL;
 }
 
 int armpmu_request_irq(int irq, int cpu)
 {
 	int err = 0;
 	const irq_handler_t handler = armpmu_dispatch_irq;
+	const struct pmu_irq_ops *irq_ops;
+
 	if (!irq)
 		return 0;
 
@@ -584,15 +639,26 @@ int armpmu_request_irq(int irq, int cpu)
 		irq_set_status_flags(irq, IRQ_NOAUTOEN);
 		err = request_irq(irq, handler, irq_flags, "arm-pmu",
 				  per_cpu_ptr(&cpu_armpmu, cpu));
+
+		irq_ops = &pmuirq_ops;
 	} else if (armpmu_count_irq_users(irq) == 0) {
 		err = request_percpu_irq(irq, handler, "arm-pmu",
 					 &cpu_armpmu);
+
+		irq_ops = &percpu_pmuirq_ops;
+	} else {
+		/* Per cpudevid irq was already requested by another CPU */
+		irq_ops = armpmu_find_irq_ops(irq);
+
+		if (WARN_ON(!irq_ops))
+			err = -EINVAL;
 	}
 
 	if (err)
 		goto err_out;
 
 	per_cpu(cpu_irq, cpu) = irq;
+	per_cpu(cpu_irq_ops, cpu) = irq_ops;
 	return 0;
 
 err_out:
@@ -625,12 +691,8 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 	per_cpu(cpu_armpmu, cpu) = pmu;
 
 	irq = armpmu_get_cpu_irq(pmu, cpu);
-	if (irq) {
-		if (irq_is_percpu_devid(irq))
-			enable_percpu_irq(irq, IRQ_TYPE_NONE);
-		else
-			enable_irq(irq);
-	}
+	if (irq)
+		per_cpu(cpu_irq_ops, cpu)->enable_pmuirq(irq);
 
 	return 0;
 }
@@ -644,12 +706,8 @@ static int arm_perf_teardown_cpu(unsigned int cpu, struct hlist_node *node)
 		return 0;
 
 	irq = armpmu_get_cpu_irq(pmu, cpu);
-	if (irq) {
-		if (irq_is_percpu_devid(irq))
-			disable_percpu_irq(irq);
-		else
-			disable_irq_nosync(irq);
-	}
+	if (irq)
+		per_cpu(cpu_irq_ops, cpu)->disable_pmuirq(irq);
 
 	per_cpu(cpu_armpmu, cpu) = NULL;
 

From d8f6267f7ce5dc7b8920910e7e75216f77e06d21 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 24 Sep 2020 12:07:06 +0100
Subject: [PATCH 178/262] arm_pmu: arm64: Use NMIs for PMU

Add required PMU interrupt operations for NMIs. Request interrupt lines as
NMIs when possible, otherwise fall back to normal interrupts.

NMIs are only supported on the arm64 architecture with a GICv3 irqchip.

[Alexandru E.: Added that NMIs only work on arm64 + GICv3, print message
	when PMU is using NMIs]

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Sumit Garg <sumit.garg@linaro.org> (Developerbox)
Cc: Julien Thierry <julien.thierry.kdev@gmail.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20200924110706.254996-8-alexandru.elisei@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 71 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 8 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index a770726e98d4d9..cb2f55f450e4ad 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -45,6 +45,17 @@ static const struct pmu_irq_ops pmuirq_ops = {
 	.free_pmuirq = armpmu_free_pmuirq
 };
 
+static void armpmu_free_pmunmi(unsigned int irq, int cpu, void __percpu *devid)
+{
+	free_nmi(irq, per_cpu_ptr(devid, cpu));
+}
+
+static const struct pmu_irq_ops pmunmi_ops = {
+	.enable_pmuirq = enable_nmi,
+	.disable_pmuirq = disable_nmi_nosync,
+	.free_pmuirq = armpmu_free_pmunmi
+};
+
 static void armpmu_enable_percpu_pmuirq(unsigned int irq)
 {
 	enable_percpu_irq(irq, IRQ_TYPE_NONE);
@@ -63,10 +74,37 @@ static const struct pmu_irq_ops percpu_pmuirq_ops = {
 	.free_pmuirq = armpmu_free_percpu_pmuirq
 };
 
+static void armpmu_enable_percpu_pmunmi(unsigned int irq)
+{
+	if (!prepare_percpu_nmi(irq))
+		enable_percpu_nmi(irq, IRQ_TYPE_NONE);
+}
+
+static void armpmu_disable_percpu_pmunmi(unsigned int irq)
+{
+	disable_percpu_nmi(irq);
+	teardown_percpu_nmi(irq);
+}
+
+static void armpmu_free_percpu_pmunmi(unsigned int irq, int cpu,
+				      void __percpu *devid)
+{
+	if (armpmu_count_irq_users(irq) == 1)
+		free_percpu_nmi(irq, devid);
+}
+
+static const struct pmu_irq_ops percpu_pmunmi_ops = {
+	.enable_pmuirq = armpmu_enable_percpu_pmunmi,
+	.disable_pmuirq = armpmu_disable_percpu_pmunmi,
+	.free_pmuirq = armpmu_free_percpu_pmunmi
+};
+
 static DEFINE_PER_CPU(struct arm_pmu *, cpu_armpmu);
 static DEFINE_PER_CPU(int, cpu_irq);
 static DEFINE_PER_CPU(const struct pmu_irq_ops *, cpu_irq_ops);
 
+static bool has_nmi;
+
 static inline u64 arm_pmu_event_max_period(struct perf_event *event)
 {
 	if (event->hw.flags & ARMPMU_EVT_64BIT)
@@ -637,15 +675,31 @@ int armpmu_request_irq(int irq, int cpu)
 			    IRQF_NO_THREAD;
 
 		irq_set_status_flags(irq, IRQ_NOAUTOEN);
-		err = request_irq(irq, handler, irq_flags, "arm-pmu",
+
+		err = request_nmi(irq, handler, irq_flags, "arm-pmu",
 				  per_cpu_ptr(&cpu_armpmu, cpu));
 
-		irq_ops = &pmuirq_ops;
+		/* If cannot get an NMI, get a normal interrupt */
+		if (err) {
+			err = request_irq(irq, handler, irq_flags, "arm-pmu",
+					  per_cpu_ptr(&cpu_armpmu, cpu));
+			irq_ops = &pmuirq_ops;
+		} else {
+			has_nmi = true;
+			irq_ops = &pmunmi_ops;
+		}
 	} else if (armpmu_count_irq_users(irq) == 0) {
-		err = request_percpu_irq(irq, handler, "arm-pmu",
-					 &cpu_armpmu);
-
-		irq_ops = &percpu_pmuirq_ops;
+		err = request_percpu_nmi(irq, handler, "arm-pmu", &cpu_armpmu);
+
+		/* If cannot get an NMI, get a normal interrupt */
+		if (err) {
+			err = request_percpu_irq(irq, handler, "arm-pmu",
+						 &cpu_armpmu);
+			irq_ops = &percpu_pmuirq_ops;
+		} else {
+			has_nmi= true;
+			irq_ops = &percpu_pmunmi_ops;
+		}
 	} else {
 		/* Per cpudevid irq was already requested by another CPU */
 		irq_ops = armpmu_find_irq_ops(irq);
@@ -928,8 +982,9 @@ int armpmu_register(struct arm_pmu *pmu)
 	if (!__oprofile_cpu_pmu)
 		__oprofile_cpu_pmu = pmu;
 
-	pr_info("enabled with %s PMU driver, %d counters available\n",
-		pmu->name, pmu->num_events);
+	pr_info("enabled with %s PMU driver, %d counters available%s\n",
+		pmu->name, pmu->num_events,
+		has_nmi ? ", using NMIs" : "");
 
 	return 0;
 

From 5735f515843019257913432a8c36eb558be388db Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:11 +1000
Subject: [PATCH 179/262] firmware: arm_sdei: Remove sdei_is_err()

sdei_is_err() is only called by sdei_to_linux_errno(). The logic of
checking on the error number is common to them. They can be combined
finely.

This removes sdei_is_err() and its logic is combined to the function
sdei_to_linux_errno(). Also, the assignment of @err to zero is also
dropped in invoke_sdei_fn() because it's always overridden afterwards.
This shouldn't cause functional changes.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: James Morse <james.morse@arm.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200922130423.10173-2-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index b4b9ce97f415e3..2d256b2ed4b497 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -114,26 +114,7 @@ static int sdei_to_linux_errno(unsigned long sdei_err)
 		return -ENOMEM;
 	}
 
-	/* Not an error value ... */
-	return sdei_err;
-}
-
-/*
- * If x0 is any of these values, then the call failed, use sdei_to_linux_errno()
- * to translate.
- */
-static int sdei_is_err(struct arm_smccc_res *res)
-{
-	switch (res->a0) {
-	case SDEI_NOT_SUPPORTED:
-	case SDEI_INVALID_PARAMETERS:
-	case SDEI_DENIED:
-	case SDEI_PENDING:
-	case SDEI_OUT_OF_RESOURCE:
-		return true;
-	}
-
-	return false;
+	return 0;
 }
 
 static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0,
@@ -141,14 +122,13 @@ static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0,
 			  unsigned long arg3, unsigned long arg4,
 			  u64 *result)
 {
-	int err = 0;
+	int err;
 	struct arm_smccc_res res;
 
 	if (sdei_firmware_call) {
 		sdei_firmware_call(function_id, arg0, arg1, arg2, arg3, arg4,
 				   &res);
-		if (sdei_is_err(&res))
-			err = sdei_to_linux_errno(res.a0);
+		err = sdei_to_linux_errno(res.a0);
 	} else {
 		/*
 		 * !sdei_firmware_call means we failed to probe or called

From 119884249fdba34a06df250a4402a6b2f3697a47 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:12 +1000
Subject: [PATCH 180/262] firmware: arm_sdei: Common block for failing path in
 sdei_event_create()

There are multiple calls of kfree(event) in the failing path of
sdei_event_create() to free the SDEI event. It means we need to
call it again when adding more code in the failing path. It's
prone to miss doing that and introduce memory leakage.

This introduces common block for failing path in sdei_event_create()
to resolve the issue. This shouldn't cause functional changes.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-3-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 2d256b2ed4b497..a126ab7e3490b6 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -190,33 +190,31 @@ static struct sdei_event *sdei_event_create(u32 event_num,
 	lockdep_assert_held(&sdei_events_lock);
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
-	if (!event)
-		return ERR_PTR(-ENOMEM);
+	if (!event) {
+		err = -ENOMEM;
+		goto fail;
+	}
 
 	INIT_LIST_HEAD(&event->list);
 	event->event_num = event_num;
 
 	err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY,
 				      &result);
-	if (err) {
-		kfree(event);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail;
 	event->priority = result;
 
 	err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_TYPE,
 				      &result);
-	if (err) {
-		kfree(event);
-		return ERR_PTR(err);
-	}
+	if (err)
+		goto fail;
 	event->type = result;
 
 	if (event->type == SDEI_EVENT_TYPE_SHARED) {
 		reg = kzalloc(sizeof(*reg), GFP_KERNEL);
 		if (!reg) {
-			kfree(event);
-			return ERR_PTR(-ENOMEM);
+			err = -ENOMEM;
+			goto fail;
 		}
 
 		reg->event_num = event_num;
@@ -231,8 +229,8 @@ static struct sdei_event *sdei_event_create(u32 event_num,
 
 		regs = alloc_percpu(struct sdei_registered_event);
 		if (!regs) {
-			kfree(event);
-			return ERR_PTR(-ENOMEM);
+			err = -ENOMEM;
+			goto fail;
 		}
 
 		for_each_possible_cpu(cpu) {
@@ -252,6 +250,10 @@ static struct sdei_event *sdei_event_create(u32 event_num,
 	spin_unlock(&sdei_list_lock);
 
 	return event;
+
+fail:
+	kfree(event);
+	return ERR_PTR(err);
 }
 
 static void sdei_event_destroy_llocked(struct sdei_event *event)

From 663c0e89c8defd75a9d34ae31dbc315d6cf894bd Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:13 +1000
Subject: [PATCH 181/262] firmware: arm_sdei: Retrieve event number from event
 instance

In sdei_event_create(), the event number is retrieved from the
variable @event_num for the shared event. The event number was
stored in the event instance. So we can fetch it from the event
instance, similar to what we're doing for the private event.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-4-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a126ab7e3490b6..0f49fff20cc76a 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -217,7 +217,7 @@ static struct sdei_event *sdei_event_create(u32 event_num,
 			goto fail;
 		}
 
-		reg->event_num = event_num;
+		reg->event_num = event->event_num;
 		reg->priority = event->priority;
 
 		reg->callback = cb;

From 10fd7c42b7953b3316806388493bff22abd3f78c Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:14 +1000
Subject: [PATCH 182/262] firmware: arm_sdei: Avoid nested statements in
 sdei_init()

In sdei_init(), the nested statements can be avoided by bailing
on error from platform_driver_register() or absent ACPI SDEI table.
With it, the code looks a bit more readable.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-5-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 0f49fff20cc76a..1fa4e577b78eec 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1081,17 +1081,18 @@ static bool __init sdei_present_acpi(void)
 
 static int __init sdei_init(void)
 {
-	int ret = platform_driver_register(&sdei_driver);
-
-	if (!ret && sdei_present_acpi()) {
-		struct platform_device *pdev;
-
-		pdev = platform_device_register_simple(sdei_driver.driver.name,
-						       0, NULL, 0);
-		if (IS_ERR(pdev))
-			pr_info("Failed to register ACPI:SDEI platform device %ld\n",
-				PTR_ERR(pdev));
-	}
+	struct platform_device *pdev;
+	int ret;
+
+	ret = platform_driver_register(&sdei_driver);
+	if (ret || !sdei_present_acpi())
+		return ret;
+
+	pdev = platform_device_register_simple(sdei_driver.driver.name,
+					       0, NULL, 0);
+	if (IS_ERR(pdev))
+		pr_info("Failed to register ACPI:SDEI platform device %ld\n",
+			PTR_ERR(pdev));
 
 	return ret;
 }

From 63627cae41e33763ca967bd2bb8cfe5d281bfe64 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:15 +1000
Subject: [PATCH 183/262] firmware: arm_sdei: Unregister driver on error in
 sdei_init()

The SDEI platform device is created from device-tree node or ACPI
(SDEI) table. For the later case, the platform device is created
explicitly by this module. It'd better to unregister the driver on
failure to create the device to keep the symmetry. The driver, owned
by this module, isn't needed if the device isn't existing.

Besides, the errno (@ret) should be updated accordingly in this
case.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-6-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 1fa4e577b78eec..e7e9059c395b3e 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1090,9 +1090,12 @@ static int __init sdei_init(void)
 
 	pdev = platform_device_register_simple(sdei_driver.driver.name,
 					       0, NULL, 0);
-	if (IS_ERR(pdev))
-		pr_info("Failed to register ACPI:SDEI platform device %ld\n",
-			PTR_ERR(pdev));
+	if (IS_ERR(pdev)) {
+		ret = PTR_ERR(pdev);
+		platform_driver_unregister(&sdei_driver);
+		pr_info("Failed to register ACPI:SDEI platform device %d\n",
+			ret);
+	}
 
 	return ret;
 }

From bc110fd322816c9684603a55404b51228c0feca8 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:16 +1000
Subject: [PATCH 184/262] firmware: arm_sdei: Remove duplicate check in
 sdei_get_conduit()

The following two checks are duplicate because @acpi_disabled doesn't
depend on CONFIG_ACPI. So the duplicate check (IS_ENABLED(CONFIG_ACPI))
can be dropped. More details is provided to keep the commit log complete:

   * @acpi_disabled is defined in arch/arm64/kernel/acpi.c when
     CONFIG_ACPI is enabled.
   * @acpi_disabled in defined in include/acpi.h when CONFIG_ACPI
     is disabled.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-7-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index e7e9059c395b3e..5daa4e20595c47 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -958,7 +958,7 @@ static int sdei_get_conduit(struct platform_device *pdev)
 		}
 
 		pr_warn("invalid \"method\" property: %s\n", method);
-	} else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
+	} else if (!acpi_disabled) {
 		if (acpi_psci_use_hvc()) {
 			sdei_firmware_call = &sdei_smccc_hvc;
 			return SMCCC_CONDUIT_HVC;

From 101119a35ca108022391ad5b84d31e203970185c Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:17 +1000
Subject: [PATCH 185/262] firmware: arm_sdei: Remove redundant error message in
 sdei_probe()

This removes the redundant error message in sdei_probe() because
the case can be identified from the errno in next error message.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-8-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 5daa4e20595c47..4caeef3e1e0b41 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -982,8 +982,6 @@ static int sdei_probe(struct platform_device *pdev)
 		return 0;
 
 	err = sdei_api_get_version(&ver);
-	if (err == -EOPNOTSUPP)
-		pr_err("advertised but not implemented in platform firmware\n");
 	if (err) {
 		pr_err("Failed to get SDEI version: %d\n", err);
 		sdei_mark_interface_broken();

From 1bbc75518503fe7992c8ec09557b453ec7222dcf Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:18 +1000
Subject: [PATCH 186/262] firmware: arm_sdei: Remove while loop in
 sdei_event_register()

This removes the unnecessary while loop in sdei_event_register()
because of the following two reasons. This shouldn't cause any
functional changes.

   * The while loop is executed for once, meaning it's not needed
     in theory.
   * With the while loop removed, the nested statements can be
     avoid to make the code a bit cleaner.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200922130423.10173-9-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 52 ++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 4caeef3e1e0b41..6595bd66aa739b 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -590,36 +590,34 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
 	WARN_ON(in_nmi());
 
 	mutex_lock(&sdei_events_lock);
-	do {
-		if (sdei_event_find(event_num)) {
-			pr_warn("Event %u already registered\n", event_num);
-			err = -EBUSY;
-			break;
-		}
+	if (sdei_event_find(event_num)) {
+		pr_warn("Event %u already registered\n", event_num);
+		err = -EBUSY;
+		goto unlock;
+	}
 
-		event = sdei_event_create(event_num, cb, arg);
-		if (IS_ERR(event)) {
-			err = PTR_ERR(event);
-			pr_warn("Failed to create event %u: %d\n", event_num,
-				err);
-			break;
-		}
+	event = sdei_event_create(event_num, cb, arg);
+	if (IS_ERR(event)) {
+		err = PTR_ERR(event);
+		pr_warn("Failed to create event %u: %d\n", event_num, err);
+		goto unlock;
+	}
 
-		cpus_read_lock();
-		err = _sdei_event_register(event);
-		if (err) {
-			sdei_event_destroy(event);
-			pr_warn("Failed to register event %u: %d\n", event_num,
-				err);
-		} else {
-			spin_lock(&sdei_list_lock);
-			event->reregister = true;
-			spin_unlock(&sdei_list_lock);
-		}
-		cpus_read_unlock();
-	} while (0);
-	mutex_unlock(&sdei_events_lock);
+	cpus_read_lock();
+	err = _sdei_event_register(event);
+	if (err) {
+		sdei_event_destroy(event);
+		pr_warn("Failed to register event %u: %d\n", event_num, err);
+		goto cpu_unlock;
+	}
 
+	spin_lock(&sdei_list_lock);
+	event->reregister = true;
+	spin_unlock(&sdei_list_lock);
+cpu_unlock:
+	cpus_read_unlock();
+unlock:
+	mutex_unlock(&sdei_events_lock);
 	return err;
 }
 

From b06146b698e6e835253d01901ecf0ab84646591e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:19 +1000
Subject: [PATCH 187/262] firmware: arm_sdei: Remove while loop in
 sdei_event_unregister()

This removes the unnecessary while loop in sdei_event_unregister()
because of the following two reasons. This shouldn't cause any
functional changes.

   * The while loop is executed for once, meaning it's not needed
     in theory.
   * With the while loop removed, the nested statements can be
     avoid to make the code a bit cleaner.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200922130423.10173-10-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 6595bd66aa739b..790bff70d16959 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -491,24 +491,23 @@ int sdei_event_unregister(u32 event_num)
 
 	mutex_lock(&sdei_events_lock);
 	event = sdei_event_find(event_num);
-	do {
-		if (!event) {
-			pr_warn("Event %u not registered\n", event_num);
-			err = -ENOENT;
-			break;
-		}
+	if (!event) {
+		pr_warn("Event %u not registered\n", event_num);
+		err = -ENOENT;
+		goto unlock;
+	}
 
-		spin_lock(&sdei_list_lock);
-		event->reregister = false;
-		event->reenable = false;
-		spin_unlock(&sdei_list_lock);
+	spin_lock(&sdei_list_lock);
+	event->reregister = false;
+	event->reenable = false;
+	spin_unlock(&sdei_list_lock);
 
-		err = _sdei_event_unregister(event);
-		if (err)
-			break;
+	err = _sdei_event_unregister(event);
+	if (err)
+		goto unlock;
 
-		sdei_event_destroy(event);
-	} while (0);
+	sdei_event_destroy(event);
+unlock:
 	mutex_unlock(&sdei_events_lock);
 
 	return err;

From a27c04e1de876297b78a6b812049e6fecc6e4d9a Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:20 +1000
Subject: [PATCH 188/262] firmware: arm_sdei: Cleanup on cross call function

This applies cleanup on the cross call functions, no functional
changes are introduced:

   * Wrap the code block of CROSSCALL_INIT inside "do { } while (0)"
     as linux kernel usually does. Otherwise, scripts/checkpatch.pl
     reports warning regarding this.
   * Use smp_call_func_t for @fn argument in sdei_do_cross_call()
     as the function is called on target CPU(s).
   * Remove unnecessary space before @event in sdei_do_cross_call()

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-11-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 790bff70d16959..786c1b6d4af363 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -78,11 +78,15 @@ struct sdei_crosscall_args {
 	int first_error;
 };
 
-#define CROSSCALL_INIT(arg, event)	(arg.event = event, \
-					 arg.first_error = 0, \
-					 atomic_set(&arg.errors, 0))
-
-static inline int sdei_do_cross_call(void *fn, struct sdei_event * event)
+#define CROSSCALL_INIT(arg, event)		\
+	do {					\
+		arg.event = event;		\
+		arg.first_error = 0;		\
+		atomic_set(&arg.errors, 0);	\
+	} while (0)
+
+static inline int sdei_do_cross_call(smp_call_func_t fn,
+				     struct sdei_event *event)
 {
 	struct sdei_crosscall_args arg;
 

From f4673625a52c69a12d2a8f71bcf408c685c148ec Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:21 +1000
Subject: [PATCH 189/262] firmware: arm_sdei: Introduce sdei_do_local_call()

During the CPU hotplug, the private events are registered, enabled
or unregistered on the specific CPU. It repeats the same steps:
initializing cross call argument, make function call on local CPU,
check the returned error.

This introduces sdei_do_local_call() to cover the first steps. The
other benefit is to make CROSSCALL_INIT and struct sdei_crosscall_args
are only visible to sdei_do_{cross, local}_call().

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-12-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 41 ++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 16 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 786c1b6d4af363..cf1a37b40e2410 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -85,6 +85,17 @@ struct sdei_crosscall_args {
 		atomic_set(&arg.errors, 0);	\
 	} while (0)
 
+static inline int sdei_do_local_call(smp_call_func_t fn,
+				     struct sdei_event *event)
+{
+	struct sdei_crosscall_args arg;
+
+	CROSSCALL_INIT(arg, event);
+	fn(&arg);
+
+	return arg.first_error;
+}
+
 static inline int sdei_do_cross_call(smp_call_func_t fn,
 				     struct sdei_event *event)
 {
@@ -677,7 +688,7 @@ static int sdei_reregister_shared(void)
 static int sdei_cpuhp_down(unsigned int cpu)
 {
 	struct sdei_event *event;
-	struct sdei_crosscall_args arg;
+	int err;
 
 	/* un-register private events */
 	spin_lock(&sdei_list_lock);
@@ -685,12 +696,11 @@ static int sdei_cpuhp_down(unsigned int cpu)
 		if (event->type == SDEI_EVENT_TYPE_SHARED)
 			continue;
 
-		CROSSCALL_INIT(arg, event);
-		/* call the cross-call function locally... */
-		_local_event_unregister(&arg);
-		if (arg.first_error)
+		err = sdei_do_local_call(_local_event_unregister, event);
+		if (err) {
 			pr_err("Failed to unregister event %u: %d\n",
-			       event->event_num, arg.first_error);
+			       event->event_num, err);
+		}
 	}
 	spin_unlock(&sdei_list_lock);
 
@@ -700,7 +710,7 @@ static int sdei_cpuhp_down(unsigned int cpu)
 static int sdei_cpuhp_up(unsigned int cpu)
 {
 	struct sdei_event *event;
-	struct sdei_crosscall_args arg;
+	int err;
 
 	/* re-register/enable private events */
 	spin_lock(&sdei_list_lock);
@@ -709,20 +719,19 @@ static int sdei_cpuhp_up(unsigned int cpu)
 			continue;
 
 		if (event->reregister) {
-			CROSSCALL_INIT(arg, event);
-			/* call the cross-call function locally... */
-			_local_event_register(&arg);
-			if (arg.first_error)
+			err = sdei_do_local_call(_local_event_register, event);
+			if (err) {
 				pr_err("Failed to re-register event %u: %d\n",
-				       event->event_num, arg.first_error);
+				       event->event_num, err);
+			}
 		}
 
 		if (event->reenable) {
-			CROSSCALL_INIT(arg, event);
-			_local_event_enable(&arg);
-			if (arg.first_error)
+			err = sdei_do_local_call(_local_event_enable, event);
+			if (err) {
 				pr_err("Failed to re-enable event %u: %d\n",
-				       event->event_num, arg.first_error);
+				       event->event_num, err);
+			}
 		}
 	}
 	spin_unlock(&sdei_list_lock);

From d2fc580d2dcaf92863fbb194d4e70ef1383b92c4 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:22 +1000
Subject: [PATCH 190/262] firmware: arm_sdei: Remove _sdei_event_register()

The function _sdei_event_register() is called by sdei_event_register()
and sdei_device_thaw() as the following functional call chain shows.
_sdei_event_register() covers the shared and private events, but
sdei_device_thaw() only covers the shared events. So the logic to
cover the private events in _sdei_event_register() isn't needed by
sdei_device_thaw().

Similarly, sdei_reregister_event_llocked() covers the shared and
private events in the regard of reenablement. The logic to cover
the private events isn't needed by sdei_device_thaw() either.

   sdei_event_register          sdei_device_thaw
      _sdei_event_register         sdei_reregister_shared
                                      sdei_reregister_event_llocked
                                         _sdei_event_register

This removes _sdei_event_register() and sdei_reregister_event_llocked().
Their logic is moved to sdei_event_register() and sdei_reregister_shared().
This shouldn't cause any logical changes.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-13-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 77 ++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 49 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index cf1a37b40e2410..361d142ad2a8ec 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -577,25 +577,6 @@ static void _local_event_register(void *data)
 	sdei_cross_call_return(arg, err);
 }
 
-static int _sdei_event_register(struct sdei_event *event)
-{
-	int err;
-
-	lockdep_assert_held(&sdei_events_lock);
-
-	if (event->type == SDEI_EVENT_TYPE_SHARED)
-		return sdei_api_event_register(event->event_num,
-					       sdei_entry_point,
-					       event->registered,
-					       SDEI_EVENT_REGISTER_RM_ANY, 0);
-
-	err = sdei_do_cross_call(_local_event_register, event);
-	if (err)
-		sdei_do_cross_call(_local_event_unregister, event);
-
-	return err;
-}
-
 int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
 {
 	int err;
@@ -618,7 +599,17 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
 	}
 
 	cpus_read_lock();
-	err = _sdei_event_register(event);
+	if (event->type == SDEI_EVENT_TYPE_SHARED) {
+		err = sdei_api_event_register(event->event_num,
+					      sdei_entry_point,
+					      event->registered,
+					      SDEI_EVENT_REGISTER_RM_ANY, 0);
+	} else {
+		err = sdei_do_cross_call(_local_event_register, event);
+		if (err)
+			sdei_do_cross_call(_local_event_unregister, event);
+	}
+
 	if (err) {
 		sdei_event_destroy(event);
 		pr_warn("Failed to register event %u: %d\n", event_num, err);
@@ -635,33 +626,6 @@ int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
 	return err;
 }
 
-static int sdei_reregister_event_llocked(struct sdei_event *event)
-{
-	int err;
-
-	lockdep_assert_held(&sdei_events_lock);
-	lockdep_assert_held(&sdei_list_lock);
-
-	err = _sdei_event_register(event);
-	if (err) {
-		pr_err("Failed to re-register event %u\n", event->event_num);
-		sdei_event_destroy_llocked(event);
-		return err;
-	}
-
-	if (event->reenable) {
-		if (event->type == SDEI_EVENT_TYPE_SHARED)
-			err = sdei_api_event_enable(event->event_num);
-		else
-			err = sdei_do_cross_call(_local_event_enable, event);
-	}
-
-	if (err)
-		pr_err("Failed to re-enable event %u\n", event->event_num);
-
-	return err;
-}
-
 static int sdei_reregister_shared(void)
 {
 	int err = 0;
@@ -674,9 +638,24 @@ static int sdei_reregister_shared(void)
 			continue;
 
 		if (event->reregister) {
-			err = sdei_reregister_event_llocked(event);
-			if (err)
+			err = sdei_api_event_register(event->event_num,
+					sdei_entry_point, event->registered,
+					SDEI_EVENT_REGISTER_RM_ANY, 0);
+			if (err) {
+				pr_err("Failed to re-register event %u\n",
+				       event->event_num);
+				sdei_event_destroy_llocked(event);
 				break;
+			}
+		}
+
+		if (event->reenable) {
+			err = sdei_api_event_enable(event->event_num);
+			if (err) {
+				pr_err("Failed to re-enable event %u\n",
+				       event->event_num);
+				break;
+			}
 		}
 	}
 	spin_unlock(&sdei_list_lock);

From 4b2b76cbbc8ff5dabc18123d94a1125143aea567 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Tue, 22 Sep 2020 23:04:23 +1000
Subject: [PATCH 191/262] firmware: arm_sdei: Remove _sdei_event_unregister()

_sdei_event_unregister() is called by sdei_event_unregister() and
sdei_device_freeze(). _sdei_event_unregister() covers the shared
and private events, but sdei_device_freeze() only covers the shared
events. So the logic to cover the private events isn't needed by
sdei_device_freeze().

   sdei_event_unregister        sdei_device_freeze
      _sdei_event_unregister       sdei_unregister_shared
                                     _sdei_event_unregister

This removes _sdei_event_unregister(). Its logic is moved to its
callers accordingly. This shouldn't cause any logical changes.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: James Morse <james.morse@arm.com>
Link: https://lore.kernel.org/r/20200922130423.10173-14-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/arm_sdei.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 361d142ad2a8ec..840754dcc6ca44 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -487,16 +487,6 @@ static void _local_event_unregister(void *data)
 	sdei_cross_call_return(arg, err);
 }
 
-static int _sdei_event_unregister(struct sdei_event *event)
-{
-	lockdep_assert_held(&sdei_events_lock);
-
-	if (event->type == SDEI_EVENT_TYPE_SHARED)
-		return sdei_api_event_unregister(event->event_num);
-
-	return sdei_do_cross_call(_local_event_unregister, event);
-}
-
 int sdei_event_unregister(u32 event_num)
 {
 	int err;
@@ -517,7 +507,11 @@ int sdei_event_unregister(u32 event_num)
 	event->reenable = false;
 	spin_unlock(&sdei_list_lock);
 
-	err = _sdei_event_unregister(event);
+	if (event->type == SDEI_EVENT_TYPE_SHARED)
+		err = sdei_api_event_unregister(event->event_num);
+	else
+		err = sdei_do_cross_call(_local_event_unregister, event);
+
 	if (err)
 		goto unlock;
 
@@ -543,7 +537,7 @@ static int sdei_unregister_shared(void)
 		if (event->type != SDEI_EVENT_TYPE_SHARED)
 			continue;
 
-		err = _sdei_event_unregister(event);
+		err = sdei_api_event_unregister(event->event_num);
 		if (err)
 			break;
 	}

From 48118151d8cc7a457f61d91df0c053473d4b31b1 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Fri, 18 Sep 2020 12:18:44 +0200
Subject: [PATCH 192/262] arm64: mm: Pin down ASIDs for sharing mm with devices

To enable address space sharing with the IOMMU, introduce
arm64_mm_context_get() and arm64_mm_context_put(), that pin down a
context and ensure that it will keep its ASID after a rollover. Export
the symbols to let the modular SMMUv3 driver use them.

Pinning is necessary because a device constantly needs a valid ASID,
unlike tasks that only require one when running. Without pinning, we would
need to notify the IOMMU when we're about to use a new ASID for a task,
and it would get complicated when a new task is assigned a shared ASID.
Consider the following scenario with no ASID pinned:

1. Task t1 is running on CPUx with shared ASID (gen=1, asid=1)
2. Task t2 is scheduled on CPUx, gets ASID (1, 2)
3. Task tn is scheduled on CPUy, a rollover occurs, tn gets ASID (2, 1)
   We would now have to immediately generate a new ASID for t1, notify
   the IOMMU, and finally enable task tn. We are holding the lock during
   all that time, since we can't afford having another CPU trigger a
   rollover. The IOMMU issues invalidation commands that can take tens of
   milliseconds.

It gets needlessly complicated. All we wanted to do was schedule task tn,
that has no business with the IOMMU. By letting the IOMMU pin tasks when
needed, we avoid stalling the slow path, and let the pinning fail when
we're out of shareable ASIDs.

After a rollover, the allocator expects at least one ASID to be available
in addition to the reserved ones (one per CPU). So (NR_ASIDS - NR_CPUS -
1) is the maximum number of ASIDs that can be shared with the IOMMU.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/20200918101852.582559-5-jean-philippe@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mmu.h         |   3 +
 arch/arm64/include/asm/mmu_context.h |  11 ++-
 arch/arm64/mm/context.c              | 105 +++++++++++++++++++++++++--
 3 files changed, 112 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index a7a5ecaa2e836b..0fda85b2cc1b24 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,11 +17,14 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/refcount.h>
+
 typedef struct {
 	atomic64_t	id;
 #ifdef CONFIG_COMPAT
 	void		*sigpage;
 #endif
+	refcount_t	pinned;
 	void		*vdso;
 	unsigned long	flags;
 } mm_context_t;
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index f2d7537d6f8339..0672236e1aeab1 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -177,7 +177,13 @@ static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 #define destroy_context(mm)		do { } while(0)
 void check_and_switch_context(struct mm_struct *mm);
 
-#define init_new_context(tsk,mm)	({ atomic64_set(&(mm)->context.id, 0); 0; })
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	atomic64_set(&mm->context.id, 0);
+	refcount_set(&mm->context.pinned, 0);
+	return 0;
+}
 
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -248,6 +254,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 void verify_cpu_asid_bits(void);
 void post_ttbr_update_workaround(void);
 
+unsigned long arm64_mm_context_get(struct mm_struct *mm);
+void arm64_mm_context_put(struct mm_struct *mm);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* !__ASM_MMU_CONTEXT_H */
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 9b11c096a04230..001737a8f309b3 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -27,6 +27,10 @@ static DEFINE_PER_CPU(atomic64_t, active_asids);
 static DEFINE_PER_CPU(u64, reserved_asids);
 static cpumask_t tlb_flush_pending;
 
+static unsigned long max_pinned_asids;
+static unsigned long nr_pinned_asids;
+static unsigned long *pinned_asid_map;
+
 #define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
 #define ASID_FIRST_VERSION	(1UL << asid_bits)
 
@@ -72,7 +76,7 @@ void verify_cpu_asid_bits(void)
 	}
 }
 
-static void set_kpti_asid_bits(void)
+static void set_kpti_asid_bits(unsigned long *map)
 {
 	unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long);
 	/*
@@ -81,13 +85,15 @@ static void set_kpti_asid_bits(void)
 	 * is set, then the ASID will map only userspace. Thus
 	 * mark even as reserved for kernel.
 	 */
-	memset(asid_map, 0xaa, len);
+	memset(map, 0xaa, len);
 }
 
 static void set_reserved_asid_bits(void)
 {
-	if (arm64_kernel_unmapped_at_el0())
-		set_kpti_asid_bits();
+	if (pinned_asid_map)
+		bitmap_copy(asid_map, pinned_asid_map, NUM_USER_ASIDS);
+	else if (arm64_kernel_unmapped_at_el0())
+		set_kpti_asid_bits(asid_map);
 	else
 		bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 }
@@ -165,6 +171,14 @@ static u64 new_context(struct mm_struct *mm)
 		if (check_update_reserved_asid(asid, newasid))
 			return newasid;
 
+		/*
+		 * If it is pinned, we can keep using it. Note that reserved
+		 * takes priority, because even if it is also pinned, we need to
+		 * update the generation into the reserved_asids.
+		 */
+		if (refcount_read(&mm->context.pinned))
+			return newasid;
+
 		/*
 		 * We had a valid ASID in a previous life, so try to re-use
 		 * it if possible.
@@ -256,6 +270,71 @@ void check_and_switch_context(struct mm_struct *mm)
 		cpu_switch_mm(mm->pgd, mm);
 }
 
+unsigned long arm64_mm_context_get(struct mm_struct *mm)
+{
+	unsigned long flags;
+	u64 asid;
+
+	if (!pinned_asid_map)
+		return 0;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+
+	asid = atomic64_read(&mm->context.id);
+
+	if (refcount_inc_not_zero(&mm->context.pinned))
+		goto out_unlock;
+
+	if (nr_pinned_asids >= max_pinned_asids) {
+		asid = 0;
+		goto out_unlock;
+	}
+
+	if (!asid_gen_match(asid)) {
+		/*
+		 * We went through one or more rollover since that ASID was
+		 * used. Ensure that it is still valid, or generate a new one.
+		 */
+		asid = new_context(mm);
+		atomic64_set(&mm->context.id, asid);
+	}
+
+	nr_pinned_asids++;
+	__set_bit(asid2idx(asid), pinned_asid_map);
+	refcount_set(&mm->context.pinned, 1);
+
+out_unlock:
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+	asid &= ~ASID_MASK;
+
+	/* Set the equivalent of USER_ASID_BIT */
+	if (asid && arm64_kernel_unmapped_at_el0())
+		asid |= 1;
+
+	return asid;
+}
+EXPORT_SYMBOL_GPL(arm64_mm_context_get);
+
+void arm64_mm_context_put(struct mm_struct *mm)
+{
+	unsigned long flags;
+	u64 asid = atomic64_read(&mm->context.id);
+
+	if (!pinned_asid_map)
+		return;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+
+	if (refcount_dec_and_test(&mm->context.pinned)) {
+		__clear_bit(asid2idx(asid), pinned_asid_map);
+		nr_pinned_asids--;
+	}
+
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+}
+EXPORT_SYMBOL_GPL(arm64_mm_context_put);
+
 /* Errata workaround post TTBRx_EL1 update. */
 asmlinkage void post_ttbr_update_workaround(void)
 {
@@ -296,8 +375,11 @@ static int asids_update_limit(void)
 {
 	unsigned long num_available_asids = NUM_USER_ASIDS;
 
-	if (arm64_kernel_unmapped_at_el0())
+	if (arm64_kernel_unmapped_at_el0()) {
 		num_available_asids /= 2;
+		if (pinned_asid_map)
+			set_kpti_asid_bits(pinned_asid_map);
+	}
 	/*
 	 * Expect allocation after rollover to fail if we don't have at least
 	 * one more ASID than CPUs. ASID #0 is reserved for init_mm.
@@ -305,6 +387,13 @@ static int asids_update_limit(void)
 	WARN_ON(num_available_asids - 1 <= num_possible_cpus());
 	pr_info("ASID allocator initialised with %lu entries\n",
 		num_available_asids);
+
+	/*
+	 * There must always be an ASID available after rollover. Ensure that,
+	 * even if all CPUs have a reserved ASID and the maximum number of ASIDs
+	 * are pinned, there still is at least one empty slot in the ASID map.
+	 */
+	max_pinned_asids = num_available_asids - num_possible_cpus() - 2;
 	return 0;
 }
 arch_initcall(asids_update_limit);
@@ -319,13 +408,17 @@ static int asids_init(void)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
 		      NUM_USER_ASIDS);
 
+	pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS),
+				  sizeof(*pinned_asid_map), GFP_KERNEL);
+	nr_pinned_asids = 0;
+
 	/*
 	 * We cannot call set_reserved_asid_bits() here because CPU
 	 * caps are not finalized yet, so it is safer to assume KPTI
 	 * and reserve kernel ASID's from beginning.
 	 */
 	if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0))
-		set_kpti_asid_bits();
+		set_kpti_asid_bits(asid_map);
 	return 0;
 }
 early_initcall(asids_init);

From 6f3c4afae9805bda1682e0ad17b8ce56d85c9a83 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Fri, 18 Sep 2020 12:18:46 +0200
Subject: [PATCH 193/262] arm64: cpufeature: Export symbol
 read_sanitised_ftr_reg()

The SMMUv3 driver would like to read the MMFR0 PARANGE field in order to
share CPU page tables with devices. Allow the driver to be built as
module by exporting the read_sanitized_ftr_reg() cpufeature symbol.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20200918101852.582559-7-jean-philippe@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6424584be01e6d..07f10ad8855c46 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1111,6 +1111,7 @@ u64 read_sanitised_ftr_reg(u32 id)
 		return 0;
 	return regp->sys_val;
 }
+EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg);
 
 #define read_sysreg_case(r)	\
 	case r:		return read_sysreg_s(r)

From b5756146db3ad57a9c0e841ea01ce915db27b7de Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 28 Sep 2020 23:02:19 +0100
Subject: [PATCH 194/262] arm64: mte: Fix typo in memory tagging ABI
 documentation

We offer both PTRACE_PEEKMTETAGS and PTRACE_POKEMTETAGS requests via
ptrace().

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/memory-tagging-extension.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index e3709b536b89b4..034d37c605e840 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -142,7 +142,7 @@ the tags from or set the tags to a tracee's address space. The
 ``ptrace()`` system call is invoked as ``ptrace(request, pid, addr,
 data)`` where:
 
-- ``request`` - one of ``PTRACE_PEEKMTETAGS`` or ``PTRACE_PEEKMTETAGS``.
+- ``request`` - one of ``PTRACE_PEEKMTETAGS`` or ``PTRACE_POKEMTETAGS``.
 - ``pid`` - the tracee's PID.
 - ``addr`` - address in the tracee's address space.
 - ``data`` - pointer to a ``struct iovec`` where ``iov_base`` points to

From 39533e12063be7f55e3d6ae21ffe067799d542a4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 16 Jul 2020 17:11:10 +0100
Subject: [PATCH 195/262] arm64: Run ARCH_WORKAROUND_2 enabling code on all
 CPUs

Commit 606f8e7b27bf ("arm64: capabilities: Use linear array for
detection and verification") changed the way we deal with per-CPU errata
by only calling the .matches() callback until one CPU is found to be
affected. At this point, .matches() stop being called, and .cpu_enable()
will be called on all CPUs.

This breaks the ARCH_WORKAROUND_2 handling, as only a single CPU will be
mitigated.

In order to address this, forcefully call the .matches() callback from a
.cpu_enable() callback, which brings us back to the original behaviour.

Fixes: 606f8e7b27bf ("arm64: capabilities: Use linear array for detection and verification")
Cc: <stable@vger.kernel.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu_errata.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 3fe64bf5a58d40..abfef5f3b5fd4c 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -457,6 +457,12 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
 	return required;
 }
 
+static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
+{
+	if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
+		cap->matches(cap, SCOPE_LOCAL_CPU);
+}
+
 /* known invulnerable cores */
 static const struct midr_range arm64_ssb_cpus[] = {
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
@@ -914,6 +920,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.capability = ARM64_SSBD,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = has_ssbd_mitigation,
+		.cpu_enable = cpu_enable_ssbd_mitigation,
 		.midr_range_list = arm64_ssb_cpus,
 	},
 #ifdef CONFIG_ARM64_ERRATUM_1418040

From 6e5f0927846adf39aebee450f13871e3cb4ab012 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 22:11:13 +0100
Subject: [PATCH 196/262] arm64: Remove Spectre-related CONFIG_* options

The spectre mitigations are too configurable for their own good, leading
to confusing logic trying to figure out when we should mitigate and when
we shouldn't. Although the plethora of command-line options need to stick
around for backwards compatibility, the default-on CONFIG options that
depend on EXPERT can be dropped, as the mitigations only do anything if
the system is vulnerable, a mitigation is available and the command-line
hasn't disabled it.

Remove CONFIG_HARDEN_BRANCH_PREDICTOR and CONFIG_ARM64_SSBD in favour of
enabling this code unconditionally.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig                      | 26 -------------------------
 arch/arm64/include/asm/cpufeature.h     |  4 ----
 arch/arm64/include/asm/kvm_mmu.h        |  7 -------
 arch/arm64/include/asm/mmu.h            |  9 ---------
 arch/arm64/kernel/Makefile              |  3 +--
 arch/arm64/kernel/cpu_errata.c          | 19 ++----------------
 arch/arm64/kernel/cpufeature.c          |  4 ----
 arch/arm64/kernel/entry.S               |  4 ----
 arch/arm64/kvm/Kconfig                  |  2 +-
 arch/arm64/kvm/hyp/hyp-entry.S          |  2 --
 arch/arm64/kvm/hyp/include/hyp/switch.h |  4 ----
 11 files changed, 4 insertions(+), 80 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d232837cbeee8..51259274a819aa 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1165,32 +1165,6 @@ config UNMAP_KERNEL_AT_EL0
 
 	  If unsure, say Y.
 
-config HARDEN_BRANCH_PREDICTOR
-	bool "Harden the branch predictor against aliasing attacks" if EXPERT
-	default y
-	help
-	  Speculation attacks against some high-performance processors rely on
-	  being able to manipulate the branch predictor for a victim context by
-	  executing aliasing branches in the attacker context.  Such attacks
-	  can be partially mitigated against by clearing internal branch
-	  predictor state and limiting the prediction logic in some situations.
-
-	  This config option will take CPU-specific actions to harden the
-	  branch predictor against aliasing attacks and may rely on specific
-	  instruction sequences or control bits being set by the system
-	  firmware.
-
-	  If unsure, say Y.
-
-config ARM64_SSBD
-	bool "Speculative Store Bypass Disable" if EXPERT
-	default y
-	help
-	  This enables mitigation of the bypassing of previous stores
-	  by speculative loads.
-
-	  If unsure, say Y.
-
 config RODATA_FULL_DEFAULT_ENABLED
 	bool "Apply r/o permissions of VM areas also to their linear aliases"
 	default y
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 89b4f0142c2878..851d144527edae 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -712,12 +712,8 @@ int get_spectre_v2_workaround_state(void);
 
 static inline int arm64_get_ssbd_state(void)
 {
-#ifdef CONFIG_ARM64_SSBD
 	extern int ssbd_state;
 	return ssbd_state;
-#else
-	return ARM64_SSBD_UNKNOWN;
-#endif
 }
 
 void arm64_set_ssbd_mitigation(bool state);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 189839c3706ac5..1df85a3ddb9b34 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -527,7 +527,6 @@ static inline int kvm_map_vectors(void)
 }
 #endif
 
-#ifdef CONFIG_ARM64_SSBD
 DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
 
 static inline int hyp_map_aux_data(void)
@@ -544,12 +543,6 @@ static inline int hyp_map_aux_data(void)
 	}
 	return 0;
 }
-#else
-static inline int hyp_map_aux_data(void)
-{
-	return 0;
-}
-#endif
 
 #define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
 
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index a7a5ecaa2e836b..f5e3efeb5b9749 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -45,7 +45,6 @@ struct bp_hardening_data {
 	bp_hardening_cb_t	fn;
 };
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
 static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
@@ -64,14 +63,6 @@ static inline void arm64_apply_bp_hardening(void)
 	if (d->fn)
 		d->fn();
 }
-#else
-static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
-{
-	return NULL;
-}
-
-static inline void arm64_apply_bp_hardening(void)	{ }
-#endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
 extern void arm64_memblock_init(void);
 extern void paging_init(void);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index a561cbb91d4dc5..ed8799bdd41f27 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,7 +19,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o
+			   ssbd.o syscall.o
 
 targets			+= efi-entry.o
 
@@ -59,7 +59,6 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 obj-$(CONFIG_CRASH_CORE)		+= crash_core.o
 obj-$(CONFIG_ARM_SDE_INTERFACE)		+= sdei.o
-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
 obj-$(CONFIG_ARM64_PTR_AUTH)		+= pointer_auth.o
 obj-$(CONFIG_SHADOW_CALL_STACK)		+= scs.o
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index abfef5f3b5fd4c..dd9103915f1ec6 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -254,9 +254,7 @@ static int detect_harden_bp_fw(void)
 	    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
 		cb = qcom_link_stack_sanitization;
 
-	if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR))
-		install_bp_hardening_cb(cb, smccc_start, smccc_end);
-
+	install_bp_hardening_cb(cb, smccc_start, smccc_end);
 	return 1;
 }
 
@@ -335,11 +333,6 @@ void arm64_set_ssbd_mitigation(bool state)
 {
 	int conduit;
 
-	if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
-		pr_info_once("SSBD disabled by kernel configuration\n");
-		return;
-	}
-
 	if (this_cpu_has_cap(ARM64_SSBS)) {
 		if (state)
 			asm volatile(SET_PSTATE_SSBS(0));
@@ -584,12 +577,6 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
 
 	__spectrev2_safe = false;
 
-	if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) {
-		pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n");
-		__hardenbp_enab = false;
-		return false;
-	}
-
 	/* forced off */
 	if (__nospectre_v2 || cpu_mitigations_off()) {
 		pr_info_once("spectrev2 mitigation disabled by command line option\n");
@@ -1004,9 +991,7 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev,
 	switch (ssbd_state) {
 	case ARM64_SSBD_KERNEL:
 	case ARM64_SSBD_FORCE_ENABLE:
-		if (IS_ENABLED(CONFIG_ARM64_SSBD))
-			return sprintf(buf,
-			    "Mitigation: Speculative Store Bypass disabled via prctl\n");
+		return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n");
 	}
 
 	return sprintf(buf, "Vulnerable\n");
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6424584be01e6d..4bb45b1d4ae4af 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1583,7 +1583,6 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
 	WARN_ON(val & (7 << 27 | 7 << 21));
 }
 
-#ifdef CONFIG_ARM64_SSBD
 static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
 {
 	if (user_mode(regs))
@@ -1623,7 +1622,6 @@ static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
 		arm64_set_ssbd_mitigation(true);
 	}
 }
-#endif /* CONFIG_ARM64_SSBD */
 
 #ifdef CONFIG_ARM64_PAN
 static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
@@ -1976,7 +1974,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
 		.min_field_value = 1,
 	},
-#ifdef CONFIG_ARM64_SSBD
 	{
 		.desc = "Speculative Store Bypassing Safe (SSBS)",
 		.capability = ARM64_SSBS,
@@ -1988,7 +1985,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
 		.cpu_enable = cpu_enable_ssbs,
 	},
-#endif
 #ifdef CONFIG_ARM64_CNP
 	{
 		.desc = "Common not Private translations",
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 55af8b504b65ab..81b709349d7b46 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -132,7 +132,6 @@ alternative_else_nop_endif
 	 * them if required.
 	 */
 	.macro	apply_ssbd, state, tmp1, tmp2
-#ifdef CONFIG_ARM64_SSBD
 alternative_cb	arm64_enable_wa2_handling
 	b	.L__asm_ssbd_skip\@
 alternative_cb_end
@@ -146,7 +145,6 @@ alternative_cb	arm64_update_smccc_conduit
 	nop					// Patched to SMC/HVC #0
 alternative_cb_end
 .L__asm_ssbd_skip\@:
-#endif
 	.endm
 
 	.macro	kernel_entry, el, regsize = 64
@@ -697,11 +695,9 @@ el0_irq_naked:
 	bl	trace_hardirqs_off
 #endif
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 	tbz	x22, #55, 1f
 	bl	do_el0_irq_bp_hardening
 1:
-#endif
 	irq_handler
 
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 318c8f2df2452e..42e5895763b3a8 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -58,7 +58,7 @@ config KVM_ARM_PMU
 	  virtual machines.
 
 config KVM_INDIRECT_VECTORS
-	def_bool HARDEN_BRANCH_PREDICTOR || RANDOMIZE_BASE
+	def_bool RANDOMIZE_BASE
 
 endif # KVM
 
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 46b4dab933d0e7..41698bae5d5dde 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -116,7 +116,6 @@ el1_hvc_guest:
 			  ARM_SMCCC_ARCH_WORKAROUND_2)
 	cbnz	w1, el1_trap
 
-#ifdef CONFIG_ARM64_SSBD
 alternative_cb	arm64_enable_wa2_handling
 	b	wa2_end
 alternative_cb_end
@@ -143,7 +142,6 @@ alternative_cb_end
 wa2_end:
 	mov	x2, xzr
 	mov	x1, xzr
-#endif
 
 wa_epilogue:
 	mov	x0, xzr
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 5b6b8fa00f0af0..b503f19c37c537 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -489,7 +489,6 @@ static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu)
 
 static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
 {
-#ifdef CONFIG_ARM64_SSBD
 	/*
 	 * The host runs with the workaround always present. If the
 	 * guest wants it disabled, so be it...
@@ -497,19 +496,16 @@ static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
 	if (__needs_ssbd_off(vcpu) &&
 	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
-#endif
 }
 
 static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
 {
-#ifdef CONFIG_ARM64_SSBD
 	/*
 	 * If the guest has disabled the workaround, bring it back on.
 	 */
 	if (__needs_ssbd_off(vcpu) &&
 	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
 		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
-#endif
 }
 
 static inline void __kvm_unexpected_el2_exception(void)

From 5359a87d5bdacb0c27f282f40ccbd80ce8f9702a Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 22:16:10 +0100
Subject: [PATCH 197/262] KVM: arm64: Replace CONFIG_KVM_INDIRECT_VECTORS with
 CONFIG_RANDOMIZE_BASE

The removal of CONFIG_HARDEN_BRANCH_PREDICTOR means that
CONFIG_KVM_INDIRECT_VECTORS is synonymous with CONFIG_RANDOMIZE_BASE,
so replace it.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_asm.h | 2 +-
 arch/arm64/include/asm/kvm_mmu.h | 2 +-
 arch/arm64/kernel/cpu_errata.c   | 4 ++--
 arch/arm64/kvm/Kconfig           | 3 ---
 arch/arm64/kvm/hyp/Makefile      | 2 +-
 arch/arm64/kvm/hyp/hyp-entry.S   | 2 +-
 6 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 6f98fbd0ac8162..e9378cc8049d0d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -102,7 +102,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
 #define __kvm_hyp_init		CHOOSE_NVHE_SYM(__kvm_hyp_init)
 #define __kvm_hyp_vector	CHOOSE_HYP_SYM(__kvm_hyp_vector)
 
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
 extern atomic_t arm64_el2_vector_last_slot;
 DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
 #define __bp_harden_hyp_vecs	CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 1df85a3ddb9b34..dfd176b0642e12 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -430,7 +430,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
 	return ret;
 }
 
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
 /*
  * EL2 vectors can be mapped and rerouted in a number of ways,
  * depending on the kernel configuration and CPU present:
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index dd9103915f1ec6..135bf7f92d4a1b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -113,7 +113,7 @@ atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
 
 DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
 static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 				const char *hyp_vecs_end)
 {
@@ -167,7 +167,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 {
 	__this_cpu_write(bp_hardening_data.fn, fn);
 }
-#endif	/* CONFIG_KVM_INDIRECT_VECTORS */
+#endif	/* CONFIG_RANDOMIZE_BASE */
 
 #include <linux/arm-smccc.h>
 
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 42e5895763b3a8..043756db8f6ec2 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -57,9 +57,6 @@ config KVM_ARM_PMU
 	  Adds support for a virtual Performance Monitoring Unit (PMU) in
 	  virtual machines.
 
-config KVM_INDIRECT_VECTORS
-	def_bool RANDOMIZE_BASE
-
 endif # KVM
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index f54f0e89a71cb7..89d2bf73acb518 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -11,4 +11,4 @@ subdir-ccflags-y := -I$(incdir)				\
 		    $(DISABLE_STACKLEAK_PLUGIN)
 
 obj-$(CONFIG_KVM) += vhe/ nvhe/
-obj-$(CONFIG_KVM_INDIRECT_VECTORS) += smccc_wa.o
+obj-$(CONFIG_RANDOMIZE_BASE) += smccc_wa.o
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 41698bae5d5dde..db2dd750061731 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -286,7 +286,7 @@ SYM_CODE_START(__kvm_hyp_vector)
 	valid_vect	el1_error		// Error 32-bit EL1
 SYM_CODE_END(__kvm_hyp_vector)
 
-#ifdef CONFIG_KVM_INDIRECT_VECTORS
+#ifdef CONFIG_RANDOMIZE_BASE
 .macro hyp_ventry
 	.align 7
 1:	esb

From b181048f414668933c524fe077b9aa6e8e9a42b4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 22:42:07 +0100
Subject: [PATCH 198/262] KVM: arm64: Simplify install_bp_hardening_cb()

Use is_hyp_mode_available() to detect whether or not we need to patch
the KVM vectors for branch hardening, which avoids the need to take the
vector pointers as parameters.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpu_errata.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 135bf7f92d4a1b..a72ca57f563000 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -126,18 +126,19 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 	__flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
 }
 
-static void install_bp_hardening_cb(bp_hardening_cb_t fn,
-				    const char *hyp_vecs_start,
-				    const char *hyp_vecs_end)
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
 {
 	static DEFINE_RAW_SPINLOCK(bp_lock);
 	int cpu, slot = -1;
+	const char *hyp_vecs_start = __smccc_workaround_1_smc;
+	const char *hyp_vecs_end = __smccc_workaround_1_smc +
+				   __SMCCC_WORKAROUND_1_SMC_SZ;
 
 	/*
 	 * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
 	 * we're a guest. Skip the hyp-vectors work.
 	 */
-	if (!hyp_vecs_start) {
+	if (!is_hyp_mode_available()) {
 		__this_cpu_write(bp_hardening_data.fn, fn);
 		return;
 	}
@@ -161,9 +162,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 	raw_spin_unlock(&bp_lock);
 }
 #else
-static void install_bp_hardening_cb(bp_hardening_cb_t fn,
-				      const char *hyp_vecs_start,
-				      const char *hyp_vecs_end)
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
 {
 	__this_cpu_write(bp_hardening_data.fn, fn);
 }
@@ -209,7 +208,6 @@ early_param("nospectre_v2", parse_nospectre_v2);
 static int detect_harden_bp_fw(void)
 {
 	bp_hardening_cb_t cb;
-	void *smccc_start, *smccc_end;
 	struct arm_smccc_res res;
 	u32 midr = read_cpuid_id();
 
@@ -229,21 +227,10 @@ static int detect_harden_bp_fw(void)
 	switch (arm_smccc_1_1_get_conduit()) {
 	case SMCCC_CONDUIT_HVC:
 		cb = call_hvc_arch_workaround_1;
-		/* This is a guest, no need to patch KVM vectors */
-		smccc_start = NULL;
-		smccc_end = NULL;
 		break;
 
 	case SMCCC_CONDUIT_SMC:
 		cb = call_smc_arch_workaround_1;
-#if IS_ENABLED(CONFIG_KVM)
-		smccc_start = __smccc_workaround_1_smc;
-		smccc_end = __smccc_workaround_1_smc +
-			__SMCCC_WORKAROUND_1_SMC_SZ;
-#else
-		smccc_start = NULL;
-		smccc_end = NULL;
-#endif
 		break;
 
 	default:
@@ -254,7 +241,7 @@ static int detect_harden_bp_fw(void)
 	    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
 		cb = qcom_link_stack_sanitization;
 
-	install_bp_hardening_cb(cb, smccc_start, smccc_end);
+	install_bp_hardening_cb(cb);
 	return 1;
 }
 

From 688f1e4b6d8f792c44bec3a448664fd9e8949964 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 23:00:31 +0100
Subject: [PATCH 199/262] arm64: Rename ARM64_HARDEN_BRANCH_PREDICTOR to
 ARM64_SPECTRE_V2

For better or worse, the world knows about "Spectre" and not about
"Branch predictor hardening". Rename ARM64_HARDEN_BRANCH_PREDICTOR to
ARM64_SPECTRE_V2 as part of moving all of the Spectre mitigations into
their own little corner.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpucaps.h |  2 +-
 arch/arm64/include/asm/kvm_mmu.h | 27 +++++++++++++--------------
 arch/arm64/include/asm/mmu.h     |  2 +-
 arch/arm64/kernel/cpu_errata.c   |  2 +-
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 07b643a7071005..348bfcf6c818db 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -31,7 +31,7 @@
 #define ARM64_HAS_DCPOP				21
 #define ARM64_SVE				22
 #define ARM64_UNMAP_KERNEL_AT_EL0		23
-#define ARM64_HARDEN_BRANCH_PREDICTOR		24
+#define ARM64_SPECTRE_V2			24
 #define ARM64_HAS_RAS_EXTN			25
 #define ARM64_WORKAROUND_843419			26
 #define ARM64_HAS_CACHE_IDC			27
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index dfd176b0642e12..873e12430ac73c 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -435,14 +435,13 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
  * EL2 vectors can be mapped and rerouted in a number of ways,
  * depending on the kernel configuration and CPU present:
  *
- * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the
- *   hardening sequence is placed in one of the vector slots, which is
- *   executed before jumping to the real vectors.
+ * - If the CPU is affected by Spectre-v2, the hardening sequence is
+ *   placed in one of the vector slots, which is executed before jumping
+ *   to the real vectors.
  *
- * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the
- *   ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the
- *   hardening sequence is mapped next to the idmap page, and executed
- *   before jumping to the real vectors.
+ * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot
+ *   containing the hardening sequence is mapped next to the idmap page,
+ *   and executed before jumping to the real vectors.
  *
  * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
  *   empty slot is selected, mapped next to the idmap page, and
@@ -464,7 +463,7 @@ static inline void *kvm_get_hyp_vector(void)
 	void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
 	int slot = -1;
 
-	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
+	if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) {
 		vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs));
 		slot = data->hyp_vectors_slot;
 	}
@@ -485,15 +484,15 @@ static inline void *kvm_get_hyp_vector(void)
 static inline int kvm_map_vectors(void)
 {
 	/*
-	 * HBP  = ARM64_HARDEN_BRANCH_PREDICTOR
+	 * SV2  = ARM64_SPECTRE_V2
 	 * HEL2 = ARM64_HARDEN_EL2_VECTORS
 	 *
-	 * !HBP + !HEL2 -> use direct vectors
-	 *  HBP + !HEL2 -> use hardened vectors in place
-	 * !HBP +  HEL2 -> allocate one vector slot and use exec mapping
-	 *  HBP +  HEL2 -> use hardened vertors and use exec mapping
+	 * !SV2 + !HEL2 -> use direct vectors
+	 *  SV2 + !HEL2 -> use hardened vectors in place
+	 * !SV2 +  HEL2 -> allocate one vector slot and use exec mapping
+	 *  SV2 +  HEL2 -> use hardened vertors and use exec mapping
 	 */
-	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
+	if (cpus_have_const_cap(ARM64_SPECTRE_V2)) {
 		__kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs);
 		__kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
 	}
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index f5e3efeb5b9749..cbff2d42c1d8ea 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -56,7 +56,7 @@ static inline void arm64_apply_bp_hardening(void)
 {
 	struct bp_hardening_data *d;
 
-	if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR))
+	if (!cpus_have_const_cap(ARM64_SPECTRE_V2))
 		return;
 
 	d = arm64_get_bp_hardening_data();
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a72ca57f563000..b275f2d5e7a3eb 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -877,7 +877,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 	{
 		.desc = "Branch predictor hardening",
-		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+		.capability = ARM64_SPECTRE_V2,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = check_branch_predictor,
 		.cpu_enable = cpu_enable_branch_predictor_hardening,

From 455697adefdb8604cd10413da37c60014aecbbb7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 23:10:49 +0100
Subject: [PATCH 200/262] arm64: Introduce separate file for spectre
 mitigations and reporting

The spectre mitigation code is spread over a few different files, which
makes it both hard to follow, but also hard to remove it should we want
to do that in future.

Introduce a new file for housing the spectre mitigations, and populate
it with the spectre-v1 reporting code to start with.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/Makefile      |  2 +-
 arch/arm64/kernel/cpu_errata.c  |  6 ------
 arch/arm64/kernel/proton-pack.c | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 7 deletions(-)
 create mode 100644 arch/arm64/kernel/proton-pack.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index ed8799bdd41f27..15b0fcbb688368 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,7 +19,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   ssbd.o syscall.o
+			   ssbd.o syscall.o proton-pack.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b275f2d5e7a3eb..5eb9a9126dc446 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -949,12 +949,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	}
 };
 
-ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
 ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
new file mode 100644
index 00000000000000..c37bf468e4a4d7
--- /dev/null
+++ b/arch/arm64/kernel/proton-pack.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as
+ * detailed at:
+ *
+ *   https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability
+ *
+ * This code was originally written hastily under an awful lot of stress and so
+ * aspects of it are somewhat hacky. Unfortunately, changing anything in here
+ * instantly makes me feel ill. Thanks, Jann. Thann.
+ *
+ * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
+ * Copyright (C) 2020 Google LLC
+ *
+ * "If there's something strange in your neighbourhood, who you gonna call?"
+ *
+ * Authors: Will Deacon <will@kernel.org> and Marc Zyngier <maz@kernel.org>
+ */
+
+#include <linux/device.h>
+
+/*
+ * Spectre v1.
+ *
+ * The kernel can't protect userspace for this one: it's each person for
+ * themselves. Advertise what we're doing and be done with it.
+ */
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}

From d4647f0a2ad711101067cba69c34716758aa1e48 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 23:30:17 +0100
Subject: [PATCH 201/262] arm64: Rewrite Spectre-v2 mitigation code

The Spectre-v2 mitigation code is pretty unwieldy and hard to maintain.
This is largely due to it being written hastily, without much clue as to
how things would pan out, and also because it ends up mixing policy and
state in such a way that it is very difficult to figure out what's going
on.

Rewrite the Spectre-v2 mitigation so that it clearly separates state from
policy and follows a more structured approach to handling the mitigation.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpufeature.h |   6 -
 arch/arm64/include/asm/kvm_host.h   |  17 --
 arch/arm64/include/asm/processor.h  |   1 +
 arch/arm64/include/asm/spectre.h    |  27 +++
 arch/arm64/kernel/cpu_errata.c      | 236 +----------------------
 arch/arm64/kernel/proton-pack.c     | 288 ++++++++++++++++++++++++++++
 arch/arm64/kvm/hypercalls.c         |   8 +-
 arch/arm64/kvm/psci.c               |   8 +-
 8 files changed, 327 insertions(+), 264 deletions(-)
 create mode 100644 arch/arm64/include/asm/spectre.h

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 851d144527edae..3b48aa121cee81 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -698,12 +698,6 @@ static inline bool system_supports_tlb_range(void)
 		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
 }
 
-#define ARM64_BP_HARDEN_UNKNOWN		-1
-#define ARM64_BP_HARDEN_WA_NEEDED	0
-#define ARM64_BP_HARDEN_NOT_REQUIRED	1
-
-int get_spectre_v2_workaround_state(void);
-
 #define ARM64_SSBD_UNKNOWN		-1
 #define ARM64_SSBD_FORCE_DISABLE	0
 #define ARM64_SSBD_KERNEL		1
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e52c927aade538..59f6cff0f0cb0d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -631,23 +631,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
 #endif
 
-#define KVM_BP_HARDEN_UNKNOWN		-1
-#define KVM_BP_HARDEN_WA_NEEDED		0
-#define KVM_BP_HARDEN_NOT_REQUIRED	1
-
-static inline int kvm_arm_harden_branch_predictor(void)
-{
-	switch (get_spectre_v2_workaround_state()) {
-	case ARM64_BP_HARDEN_WA_NEEDED:
-		return KVM_BP_HARDEN_WA_NEEDED;
-	case ARM64_BP_HARDEN_NOT_REQUIRED:
-		return KVM_BP_HARDEN_NOT_REQUIRED;
-	case ARM64_BP_HARDEN_UNKNOWN:
-	default:
-		return KVM_BP_HARDEN_UNKNOWN;
-	}
-}
-
 #define KVM_SSBD_UNKNOWN		-1
 #define KVM_SSBD_FORCE_DISABLE		0
 #define KVM_SSBD_KERNEL		1
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 240fe5e5b72097..436ab1549ec672 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -38,6 +38,7 @@
 #include <asm/pgtable-hwdef.h>
 #include <asm/pointer_auth.h>
 #include <asm/ptrace.h>
+#include <asm/spectre.h>
 #include <asm/types.h>
 
 /*
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
new file mode 100644
index 00000000000000..b776abe28dffff
--- /dev/null
+++ b/arch/arm64/include/asm/spectre.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Interface for managing mitigations for Spectre vulnerabilities.
+ *
+ * Copyright (C) 2020 Google LLC
+ * Author: Will Deacon <will@kernel.org>
+ */
+
+#ifndef __ASM_SPECTRE_H
+#define __ASM_SPECTRE_H
+
+#include <asm/cpufeature.h>
+
+/* Watch out, ordering is important here. */
+enum mitigation_state {
+	SPECTRE_UNAFFECTED,
+	SPECTRE_MITIGATED,
+	SPECTRE_VULNERABLE,
+};
+
+struct task_struct;
+
+enum mitigation_state arm64_get_spectre_v2_state(void);
+bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+
+#endif	/* __ASM_SPECTRE_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 5eb9a9126dc446..4103391f51b5b5 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -106,145 +106,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
 		sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
 }
 
-atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-
-DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
-
-#ifdef CONFIG_RANDOMIZE_BASE
-static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
-				const char *hyp_vecs_end)
-{
-	void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K);
-	int i;
-
-	for (i = 0; i < SZ_2K; i += 0x80)
-		memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
-
-	__flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
-}
-
-static void install_bp_hardening_cb(bp_hardening_cb_t fn)
-{
-	static DEFINE_RAW_SPINLOCK(bp_lock);
-	int cpu, slot = -1;
-	const char *hyp_vecs_start = __smccc_workaround_1_smc;
-	const char *hyp_vecs_end = __smccc_workaround_1_smc +
-				   __SMCCC_WORKAROUND_1_SMC_SZ;
-
-	/*
-	 * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
-	 * we're a guest. Skip the hyp-vectors work.
-	 */
-	if (!is_hyp_mode_available()) {
-		__this_cpu_write(bp_hardening_data.fn, fn);
-		return;
-	}
-
-	raw_spin_lock(&bp_lock);
-	for_each_possible_cpu(cpu) {
-		if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
-			slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
-			break;
-		}
-	}
-
-	if (slot == -1) {
-		slot = atomic_inc_return(&arm64_el2_vector_last_slot);
-		BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
-		__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
-	}
-
-	__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
-	__this_cpu_write(bp_hardening_data.fn, fn);
-	raw_spin_unlock(&bp_lock);
-}
-#else
-static void install_bp_hardening_cb(bp_hardening_cb_t fn)
-{
-	__this_cpu_write(bp_hardening_data.fn, fn);
-}
-#endif	/* CONFIG_RANDOMIZE_BASE */
-
-#include <linux/arm-smccc.h>
-
-static void __maybe_unused call_smc_arch_workaround_1(void)
-{
-	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
-}
-
-static void call_hvc_arch_workaround_1(void)
-{
-	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
-}
-
-static void qcom_link_stack_sanitization(void)
-{
-	u64 tmp;
-
-	asm volatile("mov	%0, x30		\n"
-		     ".rept	16		\n"
-		     "bl	. + 4		\n"
-		     ".endr			\n"
-		     "mov	x30, %0		\n"
-		     : "=&r" (tmp));
-}
-
-static bool __nospectre_v2;
-static int __init parse_nospectre_v2(char *str)
-{
-	__nospectre_v2 = true;
-	return 0;
-}
-early_param("nospectre_v2", parse_nospectre_v2);
-
-/*
- * -1: No workaround
- *  0: No workaround required
- *  1: Workaround installed
- */
-static int detect_harden_bp_fw(void)
-{
-	bp_hardening_cb_t cb;
-	struct arm_smccc_res res;
-	u32 midr = read_cpuid_id();
-
-	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-			     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
-
-	switch ((int)res.a0) {
-	case 1:
-		/* Firmware says we're just fine */
-		return 0;
-	case 0:
-		break;
-	default:
-		return -1;
-	}
-
-	switch (arm_smccc_1_1_get_conduit()) {
-	case SMCCC_CONDUIT_HVC:
-		cb = call_hvc_arch_workaround_1;
-		break;
-
-	case SMCCC_CONDUIT_SMC:
-		cb = call_smc_arch_workaround_1;
-		break;
-
-	default:
-		return -1;
-	}
-
-	if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
-	    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
-		cb = qcom_link_stack_sanitization;
-
-	install_bp_hardening_cb(cb);
-	return 1;
-}
-
 DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
 
 int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
@@ -508,83 +369,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,			\
 	CAP_MIDR_RANGE_LIST(midr_list)
 
-/* Track overall mitigation state. We are only mitigated if all cores are ok */
-static bool __hardenbp_enab = true;
-static bool __spectrev2_safe = true;
-
-int get_spectre_v2_workaround_state(void)
-{
-	if (__spectrev2_safe)
-		return ARM64_BP_HARDEN_NOT_REQUIRED;
-
-	if (!__hardenbp_enab)
-		return ARM64_BP_HARDEN_UNKNOWN;
-
-	return ARM64_BP_HARDEN_WA_NEEDED;
-}
-
-/*
- * List of CPUs that do not need any Spectre-v2 mitigation at all.
- */
-static const struct midr_range spectre_v2_safe_list[] = {
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
-	MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
-	MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
-	MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
-	MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
-	{ /* sentinel */ }
-};
-
-/*
- * Track overall bp hardening for all heterogeneous cores in the machine.
- * We are only considered "safe" if all booted cores are known safe.
- */
-static bool __maybe_unused
-check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
-{
-	int need_wa;
-
-	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-
-	/* If the CPU has CSV2 set, we're safe */
-	if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1),
-						 ID_AA64PFR0_CSV2_SHIFT))
-		return false;
-
-	/* Alternatively, we have a list of unaffected CPUs */
-	if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
-		return false;
-
-	/* Fallback to firmware detection */
-	need_wa = detect_harden_bp_fw();
-	if (!need_wa)
-		return false;
-
-	__spectrev2_safe = false;
-
-	/* forced off */
-	if (__nospectre_v2 || cpu_mitigations_off()) {
-		pr_info_once("spectrev2 mitigation disabled by command line option\n");
-		__hardenbp_enab = false;
-		return false;
-	}
-
-	if (need_wa < 0) {
-		pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n");
-		__hardenbp_enab = false;
-	}
-
-	return (need_wa > 0);
-}
-
-static void
-cpu_enable_branch_predictor_hardening(const struct arm64_cpu_capabilities *cap)
-{
-	cap->matches(cap, SCOPE_LOCAL_CPU);
-}
-
 static const __maybe_unused struct midr_range tx2_family_cpus[] = {
 	MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
 	MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
@@ -876,11 +660,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	},
 #endif
 	{
-		.desc = "Branch predictor hardening",
+		.desc = "Spectre-v2",
 		.capability = ARM64_SPECTRE_V2,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
-		.matches = check_branch_predictor,
-		.cpu_enable = cpu_enable_branch_predictor_hardening,
+		.matches = has_spectre_v2,
+		.cpu_enable = spectre_v2_enable_mitigation,
 	},
 #ifdef CONFIG_RANDOMIZE_BASE
 	{
@@ -949,20 +733,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	}
 };
 
-ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
-		char *buf)
-{
-	switch (get_spectre_v2_workaround_state()) {
-	case ARM64_BP_HARDEN_NOT_REQUIRED:
-		return sprintf(buf, "Not affected\n");
-        case ARM64_BP_HARDEN_WA_NEEDED:
-		return sprintf(buf, "Mitigation: Branch predictor hardening\n");
-        case ARM64_BP_HARDEN_UNKNOWN:
-	default:
-		return sprintf(buf, "Vulnerable\n");
-	}
-}
-
 ssize_t cpu_show_spec_store_bypass(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index c37bf468e4a4d7..4f1411b4530153 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -17,7 +17,33 @@
  * Authors: Will Deacon <will@kernel.org> and Marc Zyngier <maz@kernel.org>
  */
 
+#include <linux/arm-smccc.h>
+#include <linux/cpu.h>
 #include <linux/device.h>
+#include <linux/prctl.h>
+
+#include <asm/spectre.h>
+#include <asm/traps.h>
+
+/*
+ * We try to ensure that the mitigation state can never change as the result of
+ * onlining a late CPU.
+ */
+static void update_mitigation_state(enum mitigation_state *oldp,
+				    enum mitigation_state new)
+{
+	enum mitigation_state state;
+
+	do {
+		state = READ_ONCE(*oldp);
+		if (new <= state)
+			break;
+
+		/* Userspace almost certainly can't deal with this. */
+		if (WARN_ON(system_capabilities_finalized()))
+			break;
+	} while (cmpxchg_relaxed(oldp, state, new) != state);
+}
 
 /*
  * Spectre v1.
@@ -30,3 +56,265 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
 {
 	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
+
+/*
+ * Spectre v2.
+ *
+ * This one sucks. A CPU is either:
+ *
+ * - Mitigated in hardware and advertised by ID_AA64PFR0_EL1.CSV2.
+ * - Mitigated in hardware and listed in our "safe list".
+ * - Mitigated in software by firmware.
+ * - Mitigated in software by a CPU-specific dance in the kernel.
+ * - Vulnerable.
+ *
+ * It's not unlikely for different CPUs in a big.LITTLE system to fall into
+ * different camps.
+ */
+static enum mitigation_state spectre_v2_state;
+
+static bool __read_mostly __nospectre_v2;
+static int __init parse_spectre_v2_param(char *str)
+{
+	__nospectre_v2 = true;
+	return 0;
+}
+early_param("nospectre_v2", parse_spectre_v2_param);
+
+static bool spectre_v2_mitigations_off(void)
+{
+	bool ret = __nospectre_v2 || cpu_mitigations_off();
+
+	if (ret)
+		pr_info_once("spectre-v2 mitigation disabled by command line option\n");
+
+	return ret;
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	switch (spectre_v2_state) {
+	case SPECTRE_UNAFFECTED:
+		return sprintf(buf, "Not affected\n");
+	case SPECTRE_MITIGATED:
+		return sprintf(buf, "Mitigation: Branch predictor hardening\n");
+	case SPECTRE_VULNERABLE:
+		fallthrough;
+	default:
+		return sprintf(buf, "Vulnerable\n");
+	}
+}
+
+static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void)
+{
+	u64 pfr0;
+	static const struct midr_range spectre_v2_safe_list[] = {
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+		MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+		MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+		{ /* sentinel */ }
+	};
+
+	/* If the CPU has CSV2 set, we're safe */
+	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+		return SPECTRE_UNAFFECTED;
+
+	/* Alternatively, we have a list of unaffected CPUs */
+	if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+		return SPECTRE_UNAFFECTED;
+
+	return SPECTRE_VULNERABLE;
+}
+
+#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED	(1)
+
+static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+	int ret;
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+	ret = res.a0;
+	switch (ret) {
+	case SMCCC_RET_SUCCESS:
+		return SPECTRE_MITIGATED;
+	case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+		return SPECTRE_UNAFFECTED;
+	default:
+		fallthrough;
+	case SMCCC_RET_NOT_SUPPORTED:
+		return SPECTRE_VULNERABLE;
+	}
+}
+
+bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	if (spectre_v2_get_cpu_hw_mitigation_state() == SPECTRE_UNAFFECTED)
+		return false;
+
+	if (spectre_v2_get_cpu_fw_mitigation_state() == SPECTRE_UNAFFECTED)
+		return false;
+
+	return true;
+}
+
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+enum mitigation_state arm64_get_spectre_v2_state(void)
+{
+	return spectre_v2_state;
+}
+
+#ifdef CONFIG_KVM
+#ifdef CONFIG_RANDOMIZE_BASE
+#include <asm/cacheflush.h>
+#include <asm/kvm_asm.h>
+
+atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
+
+static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
+				const char *hyp_vecs_end)
+{
+	void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K);
+	int i;
+
+	for (i = 0; i < SZ_2K; i += 0x80)
+		memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
+
+	__flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
+}
+
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
+{
+	static DEFINE_RAW_SPINLOCK(bp_lock);
+	int cpu, slot = -1;
+	const char *hyp_vecs_start = __smccc_workaround_1_smc;
+	const char *hyp_vecs_end = __smccc_workaround_1_smc +
+				   __SMCCC_WORKAROUND_1_SMC_SZ;
+
+	/*
+	 * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
+	 * we're a guest. Skip the hyp-vectors work.
+	 */
+	if (!is_hyp_mode_available()) {
+		__this_cpu_write(bp_hardening_data.fn, fn);
+		return;
+	}
+
+	raw_spin_lock(&bp_lock);
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
+			slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+			break;
+		}
+	}
+
+	if (slot == -1) {
+		slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+		BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
+		__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+	}
+
+	__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+	__this_cpu_write(bp_hardening_data.fn, fn);
+	raw_spin_unlock(&bp_lock);
+}
+#else
+static void install_bp_hardening_cb(bp_hardening_cb_t fn)
+{
+	__this_cpu_write(bp_hardening_data.fn, fn);
+}
+#endif	/* CONFIG_RANDOMIZE_BASE */
+#endif	/* CONFIG_KVM */
+
+static void call_smc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void call_hvc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void qcom_link_stack_sanitisation(void)
+{
+	u64 tmp;
+
+	asm volatile("mov	%0, x30		\n"
+		     ".rept	16		\n"
+		     "bl	. + 4		\n"
+		     ".endr			\n"
+		     "mov	x30, %0		\n"
+		     : "=&r" (tmp));
+}
+
+static enum mitigation_state spectre_v2_enable_fw_mitigation(void)
+{
+	bp_hardening_cb_t cb;
+	enum mitigation_state state;
+
+	state = spectre_v2_get_cpu_fw_mitigation_state();
+	if (state != SPECTRE_MITIGATED)
+		return state;
+
+	if (spectre_v2_mitigations_off())
+		return SPECTRE_VULNERABLE;
+
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
+		cb = call_hvc_arch_workaround_1;
+		break;
+
+	case SMCCC_CONDUIT_SMC:
+		cb = call_smc_arch_workaround_1;
+		break;
+
+	default:
+		return SPECTRE_VULNERABLE;
+	}
+
+	install_bp_hardening_cb(cb);
+	return SPECTRE_MITIGATED;
+}
+
+static enum mitigation_state spectre_v2_enable_sw_mitigation(void)
+{
+	u32 midr;
+
+	if (spectre_v2_mitigations_off())
+		return SPECTRE_VULNERABLE;
+
+	midr = read_cpuid_id();
+	if (((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR) &&
+	    ((midr & MIDR_CPU_MODEL_MASK) != MIDR_QCOM_FALKOR_V1))
+		return SPECTRE_VULNERABLE;
+
+	install_bp_hardening_cb(qcom_link_stack_sanitisation);
+	return SPECTRE_MITIGATED;
+}
+
+void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+	enum mitigation_state state;
+
+	WARN_ON(preemptible());
+
+	state = spectre_v2_get_cpu_hw_mitigation_state();
+	if (state == SPECTRE_VULNERABLE)
+		state = spectre_v2_enable_fw_mitigation();
+	if (state == SPECTRE_VULNERABLE)
+		state = spectre_v2_enable_sw_mitigation();
+
+	update_mitigation_state(&spectre_v2_state, state);
+}
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 550dfa3e53cddd..413d46b9bc07ca 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -24,13 +24,13 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 		feature = smccc_get_arg1(vcpu);
 		switch (feature) {
 		case ARM_SMCCC_ARCH_WORKAROUND_1:
-			switch (kvm_arm_harden_branch_predictor()) {
-			case KVM_BP_HARDEN_UNKNOWN:
+			switch (arm64_get_spectre_v2_state()) {
+			case SPECTRE_VULNERABLE:
 				break;
-			case KVM_BP_HARDEN_WA_NEEDED:
+			case SPECTRE_MITIGATED:
 				val = SMCCC_RET_SUCCESS;
 				break;
-			case KVM_BP_HARDEN_NOT_REQUIRED:
+			case SPECTRE_UNAFFECTED:
 				val = SMCCC_RET_NOT_REQUIRED;
 				break;
 			}
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 83415e96b589ff..fbdd6f3bea7f14 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -425,12 +425,12 @@ static int get_kernel_wa_level(u64 regid)
 {
 	switch (regid) {
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
-		switch (kvm_arm_harden_branch_predictor()) {
-		case KVM_BP_HARDEN_UNKNOWN:
+		switch (arm64_get_spectre_v2_state()) {
+		case SPECTRE_VULNERABLE:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
-		case KVM_BP_HARDEN_WA_NEEDED:
+		case SPECTRE_MITIGATED:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
-		case KVM_BP_HARDEN_NOT_REQUIRED:
+		case SPECTRE_UNAFFECTED:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
 		}
 		return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;

From e1026237f90677fd5a454f63057a62f984c2188d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 16 Sep 2020 00:07:05 +0100
Subject: [PATCH 202/262] KVM: arm64: Set CSV2 for guests on hardware
 unaffected by Spectre-v2

If the system is not affected by Spectre-v2, then advertise to the KVM
guest that it is not affected, without the need for a safelist in the
guest.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/sys_regs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 077293b5115fa0..7b8a8f6169d06f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1131,6 +1131,9 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 		if (!vcpu_has_sve(vcpu))
 			val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
 		val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT);
+		if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) &&
+		    arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED)
+			val |= (1UL << ID_AA64PFR0_CSV2_SHIFT);
 	} else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) {
 		val &= ~((0xfUL << ID_AA64ISAR1_APA_SHIFT) |
 			 (0xfUL << ID_AA64ISAR1_API_SHIFT) |

From a8de949893880a26458de03f5bc70075aba13d95 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 22:20:35 +0100
Subject: [PATCH 203/262] arm64: Group start_thread() functions together

The is_ttbrX_addr() functions have somehow ended up in the middle of
the start_thread() functions, so move them out of the way to keep the
code readable.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/processor.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 436ab1549ec672..d636c3eb9cf6ac 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -220,18 +220,6 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->sp = sp;
 }
 
-static inline bool is_ttbr0_addr(unsigned long addr)
-{
-	/* entry assembly clears tags for TTBR0 addrs */
-	return addr < TASK_SIZE;
-}
-
-static inline bool is_ttbr1_addr(unsigned long addr)
-{
-	/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
-	return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
-}
-
 #ifdef CONFIG_COMPAT
 static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 				       unsigned long sp)
@@ -252,6 +240,18 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 }
 #endif
 
+static inline bool is_ttbr0_addr(unsigned long addr)
+{
+	/* entry assembly clears tags for TTBR0 addrs */
+	return addr < TASK_SIZE;
+}
+
+static inline bool is_ttbr1_addr(unsigned long addr)
+{
+	/* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+	return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
 /* Forward declaration, a strange C thing */
 struct task_struct;
 

From 532d581583f25d4c297c721700f74b913f8cf37b Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 23:56:12 +0100
Subject: [PATCH 204/262] arm64: Treat SSBS as a non-strict system feature

If all CPUs discovered during boot have SSBS, then spectre-v4 will be
considered to be "mitigated". However, we still allow late CPUs without
SSBS to be onlined, albeit with a "SANITY CHECK" warning. This is
problematic for userspace because it means that the system can quietly
transition to "Vulnerable" at runtime.

Avoid this by treating SSBS as a non-strict system feature: if all of
the CPUs discovered during boot have SSBS, then late arriving secondaries
better have it as well.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 4bb45b1d4ae4af..033e6c9527055c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -227,7 +227,7 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0),
-	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI),
 	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI),
 				    FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0),
 	ARM64_FTR_END,
@@ -487,7 +487,7 @@ static const struct arm64_ftr_bits ftr_id_pfr1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_pfr2[] = {
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -1977,7 +1977,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "Speculative Store Bypassing Safe (SSBS)",
 		.capability = ARM64_SSBS,
-		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_cpuid_feature,
 		.sys_reg = SYS_ID_AA64PFR1_EL1,
 		.field_pos = ID_AA64PFR1_SSBS_SHIFT,

From 9b0955baa4208abd72840c13c5f56a0f133c7cb3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 15 Sep 2020 23:00:31 +0100
Subject: [PATCH 205/262] arm64: Rename ARM64_SSBD to ARM64_SPECTRE_V4

In a similar manner to the renaming of ARM64_HARDEN_BRANCH_PREDICTOR
to ARM64_SPECTRE_V2, rename ARM64_SSBD to ARM64_SPECTRE_V4. This isn't
_entirely_ accurate, as we also need to take into account the interaction
with SSBS, but that will be taken care of in subsequent patches.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpucaps.h        | 2 +-
 arch/arm64/kernel/cpu_errata.c          | 2 +-
 arch/arm64/kvm/hyp/include/hyp/switch.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 348bfcf6c818db..c4ac9a13ad5fac 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -37,7 +37,7 @@
 #define ARM64_HAS_CACHE_IDC			27
 #define ARM64_HAS_CACHE_DIC			28
 #define ARM64_HW_DBM				29
-#define ARM64_SSBD				30
+#define ARM64_SPECTRE_V4			30
 #define ARM64_MISMATCHED_CACHE_TYPE		31
 #define ARM64_HAS_STAGE2_FWB			32
 #define ARM64_HAS_CRC32				33
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 4103391f51b5b5..6a48957d44fc9b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -675,7 +675,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 	{
 		.desc = "Speculative Store Bypass Disable",
-		.capability = ARM64_SSBD,
+		.capability = ARM64_SPECTRE_V4,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
 		.matches = has_ssbd_mitigation,
 		.cpu_enable = cpu_enable_ssbd_mitigation,
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index b503f19c37c537..5dec5589625d11 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -481,7 +481,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu)
 {
-	if (!cpus_have_final_cap(ARM64_SSBD))
+	if (!cpus_have_final_cap(ARM64_SPECTRE_V4))
 		return false;
 
 	return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);

From 9e78b659b4539ee20fd0c415cf8c231cea59e9c0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 18 Sep 2020 11:45:57 +0100
Subject: [PATCH 206/262] arm64: Move SSBD prctl() handler alongside other
 spectre mitigation code

As part of the spectre consolidation effort to shift all of the ghosts
into their own proton pack, move all of the horrible SSBD prctl() code
out of its own 'ssbd.c' file.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/Makefile      |   2 +-
 arch/arm64/kernel/proton-pack.c | 118 +++++++++++++++++++++++++++++
 arch/arm64/kernel/ssbd.c        | 129 --------------------------------
 3 files changed, 119 insertions(+), 130 deletions(-)
 delete mode 100644 arch/arm64/kernel/ssbd.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 15b0fcbb688368..bd12b9a2ab4a91 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -19,7 +19,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   ssbd.o syscall.o proton-pack.o
+			   syscall.o proton-pack.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 4f1411b4530153..b373e4782ccd4a 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -20,6 +20,7 @@
 #include <linux/arm-smccc.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
+#include <linux/nospec.h>
 #include <linux/prctl.h>
 
 #include <asm/spectre.h>
@@ -318,3 +319,120 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
 
 	update_mitigation_state(&spectre_v2_state, state);
 }
+
+/* Spectre v4 prctl */
+static void ssbd_ssbs_enable(struct task_struct *task)
+{
+	u64 val = is_compat_thread(task_thread_info(task)) ?
+		  PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+	task_pt_regs(task)->pstate |= val;
+}
+
+static void ssbd_ssbs_disable(struct task_struct *task)
+{
+	u64 val = is_compat_thread(task_thread_info(task)) ?
+		  PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+	task_pt_regs(task)->pstate &= ~val;
+}
+
+/*
+ * prctl interface for SSBD
+ */
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+	int state = arm64_get_ssbd_state();
+
+	/* Unsupported */
+	if (state == ARM64_SSBD_UNKNOWN)
+		return -ENODEV;
+
+	/* Treat the unaffected/mitigated state separately */
+	if (state == ARM64_SSBD_MITIGATED) {
+		switch (ctrl) {
+		case PR_SPEC_ENABLE:
+			return -EPERM;
+		case PR_SPEC_DISABLE:
+		case PR_SPEC_FORCE_DISABLE:
+			return 0;
+		}
+	}
+
+	/*
+	 * Things are a bit backward here: the arm64 internal API
+	 * *enables the mitigation* when the userspace API *disables
+	 * speculation*. So much fun.
+	 */
+	switch (ctrl) {
+	case PR_SPEC_ENABLE:
+		/* If speculation is force disabled, enable is not allowed */
+		if (state == ARM64_SSBD_FORCE_ENABLE ||
+		    task_spec_ssb_force_disable(task))
+			return -EPERM;
+		task_clear_spec_ssb_disable(task);
+		clear_tsk_thread_flag(task, TIF_SSBD);
+		ssbd_ssbs_enable(task);
+		break;
+	case PR_SPEC_DISABLE:
+		if (state == ARM64_SSBD_FORCE_DISABLE)
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		set_tsk_thread_flag(task, TIF_SSBD);
+		ssbd_ssbs_disable(task);
+		break;
+	case PR_SPEC_FORCE_DISABLE:
+		if (state == ARM64_SSBD_FORCE_DISABLE)
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		task_set_spec_ssb_force_disable(task);
+		set_tsk_thread_flag(task, TIF_SSBD);
+		ssbd_ssbs_disable(task);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+			     unsigned long ctrl)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssbd_prctl_set(task, ctrl);
+	default:
+		return -ENODEV;
+	}
+}
+
+static int ssbd_prctl_get(struct task_struct *task)
+{
+	switch (arm64_get_ssbd_state()) {
+	case ARM64_SSBD_UNKNOWN:
+		return -ENODEV;
+	case ARM64_SSBD_FORCE_ENABLE:
+		return PR_SPEC_DISABLE;
+	case ARM64_SSBD_KERNEL:
+		if (task_spec_ssb_force_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ssb_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+	case ARM64_SSBD_FORCE_DISABLE:
+		return PR_SPEC_ENABLE;
+	default:
+		return PR_SPEC_NOT_AFFECTED;
+	}
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+	switch (which) {
+	case PR_SPEC_STORE_BYPASS:
+		return ssbd_prctl_get(task);
+	default:
+		return -ENODEV;
+	}
+}
diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c
deleted file mode 100644
index b26955f567501c..00000000000000
--- a/arch/arm64/kernel/ssbd.c
+++ /dev/null
@@ -1,129 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018 ARM Ltd, All Rights Reserved.
- */
-
-#include <linux/compat.h>
-#include <linux/errno.h>
-#include <linux/prctl.h>
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/thread_info.h>
-
-#include <asm/cpufeature.h>
-
-static void ssbd_ssbs_enable(struct task_struct *task)
-{
-	u64 val = is_compat_thread(task_thread_info(task)) ?
-		  PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
-
-	task_pt_regs(task)->pstate |= val;
-}
-
-static void ssbd_ssbs_disable(struct task_struct *task)
-{
-	u64 val = is_compat_thread(task_thread_info(task)) ?
-		  PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
-
-	task_pt_regs(task)->pstate &= ~val;
-}
-
-/*
- * prctl interface for SSBD
- */
-static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
-{
-	int state = arm64_get_ssbd_state();
-
-	/* Unsupported */
-	if (state == ARM64_SSBD_UNKNOWN)
-		return -ENODEV;
-
-	/* Treat the unaffected/mitigated state separately */
-	if (state == ARM64_SSBD_MITIGATED) {
-		switch (ctrl) {
-		case PR_SPEC_ENABLE:
-			return -EPERM;
-		case PR_SPEC_DISABLE:
-		case PR_SPEC_FORCE_DISABLE:
-			return 0;
-		}
-	}
-
-	/*
-	 * Things are a bit backward here: the arm64 internal API
-	 * *enables the mitigation* when the userspace API *disables
-	 * speculation*. So much fun.
-	 */
-	switch (ctrl) {
-	case PR_SPEC_ENABLE:
-		/* If speculation is force disabled, enable is not allowed */
-		if (state == ARM64_SSBD_FORCE_ENABLE ||
-		    task_spec_ssb_force_disable(task))
-			return -EPERM;
-		task_clear_spec_ssb_disable(task);
-		clear_tsk_thread_flag(task, TIF_SSBD);
-		ssbd_ssbs_enable(task);
-		break;
-	case PR_SPEC_DISABLE:
-		if (state == ARM64_SSBD_FORCE_DISABLE)
-			return -EPERM;
-		task_set_spec_ssb_disable(task);
-		set_tsk_thread_flag(task, TIF_SSBD);
-		ssbd_ssbs_disable(task);
-		break;
-	case PR_SPEC_FORCE_DISABLE:
-		if (state == ARM64_SSBD_FORCE_DISABLE)
-			return -EPERM;
-		task_set_spec_ssb_disable(task);
-		task_set_spec_ssb_force_disable(task);
-		set_tsk_thread_flag(task, TIF_SSBD);
-		ssbd_ssbs_disable(task);
-		break;
-	default:
-		return -ERANGE;
-	}
-
-	return 0;
-}
-
-int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
-			     unsigned long ctrl)
-{
-	switch (which) {
-	case PR_SPEC_STORE_BYPASS:
-		return ssbd_prctl_set(task, ctrl);
-	default:
-		return -ENODEV;
-	}
-}
-
-static int ssbd_prctl_get(struct task_struct *task)
-{
-	switch (arm64_get_ssbd_state()) {
-	case ARM64_SSBD_UNKNOWN:
-		return -ENODEV;
-	case ARM64_SSBD_FORCE_ENABLE:
-		return PR_SPEC_DISABLE;
-	case ARM64_SSBD_KERNEL:
-		if (task_spec_ssb_force_disable(task))
-			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
-		if (task_spec_ssb_disable(task))
-			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
-		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
-	case ARM64_SSBD_FORCE_DISABLE:
-		return PR_SPEC_ENABLE;
-	default:
-		return PR_SPEC_NOT_AFFECTED;
-	}
-}
-
-int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
-{
-	switch (which) {
-	case PR_SPEC_STORE_BYPASS:
-		return ssbd_prctl_get(task);
-	default:
-		return -ENODEV;
-	}
-}

From c28762070ca651fe7a981b8f31d972c9b7d2c386 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 18 Sep 2020 11:54:33 +0100
Subject: [PATCH 207/262] arm64: Rewrite Spectre-v4 mitigation code

Rewrite the Spectre-v4 mitigation handling code to follow the same
approach as that taken by Spectre-v2.

For now, report to KVM that the system is vulnerable (by forcing
'ssbd_state' to ARM64_SSBD_UNKNOWN), as this will be cleared up in
subsequent steps.

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/processor.h |  19 +-
 arch/arm64/include/asm/spectre.h   |   5 +
 arch/arm64/kernel/cpu_errata.c     | 217 +-------------
 arch/arm64/kernel/cpufeature.c     |  41 ---
 arch/arm64/kernel/entry.S          |   6 +-
 arch/arm64/kernel/hibernate.c      |   6 +-
 arch/arm64/kernel/process.c        |  17 +-
 arch/arm64/kernel/proton-pack.c    | 439 +++++++++++++++++++++++++----
 arch/arm64/kernel/suspend.c        |   3 +-
 9 files changed, 401 insertions(+), 352 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index d636c3eb9cf6ac..7d90ea2e20639b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -198,25 +198,12 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
 		regs->pmr_save = GIC_PRIO_IRQON;
 }
 
-static inline void set_ssbs_bit(struct pt_regs *regs)
-{
-	regs->pstate |= PSR_SSBS_BIT;
-}
-
-static inline void set_compat_ssbs_bit(struct pt_regs *regs)
-{
-	regs->pstate |= PSR_AA32_SSBS_BIT;
-}
-
 static inline void start_thread(struct pt_regs *regs, unsigned long pc,
 				unsigned long sp)
 {
 	start_thread_common(regs, pc);
 	regs->pstate = PSR_MODE_EL0t;
-
-	if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
-		set_ssbs_bit(regs);
-
+	spectre_v4_enable_task_mitigation(current);
 	regs->sp = sp;
 }
 
@@ -233,9 +220,7 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
 	regs->pstate |= PSR_AA32_E_BIT;
 #endif
 
-	if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
-		set_compat_ssbs_bit(regs);
-
+	spectre_v4_enable_task_mitigation(current);
 	regs->compat_sp = sp;
 }
 #endif
diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h
index b776abe28dffff..fcdfbce302bdfb 100644
--- a/arch/arm64/include/asm/spectre.h
+++ b/arch/arm64/include/asm/spectre.h
@@ -24,4 +24,9 @@ enum mitigation_state arm64_get_spectre_v2_state(void);
 bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope);
 void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
 
+enum mitigation_state arm64_get_spectre_v4_state(void);
+bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope);
+void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
+void spectre_v4_enable_task_mitigation(struct task_struct *tsk);
+
 #endif	/* __ASM_SPECTRE_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6a48957d44fc9b..7fc54c3d428509 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -106,62 +106,7 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
 		sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
 }
 
-DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
-
-int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
-static bool __ssb_safe = true;
-
-static const struct ssbd_options {
-	const char	*str;
-	int		state;
-} ssbd_options[] = {
-	{ "force-on",	ARM64_SSBD_FORCE_ENABLE, },
-	{ "force-off",	ARM64_SSBD_FORCE_DISABLE, },
-	{ "kernel",	ARM64_SSBD_KERNEL, },
-};
-
-static int __init ssbd_cfg(char *buf)
-{
-	int i;
-
-	if (!buf || !buf[0])
-		return -EINVAL;
-
-	for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) {
-		int len = strlen(ssbd_options[i].str);
-
-		if (strncmp(buf, ssbd_options[i].str, len))
-			continue;
-
-		ssbd_state = ssbd_options[i].state;
-		return 0;
-	}
-
-	return -EINVAL;
-}
-early_param("ssbd", ssbd_cfg);
-
-void __init arm64_update_smccc_conduit(struct alt_instr *alt,
-				       __le32 *origptr, __le32 *updptr,
-				       int nr_inst)
-{
-	u32 insn;
-
-	BUG_ON(nr_inst != 1);
-
-	switch (arm_smccc_1_1_get_conduit()) {
-	case SMCCC_CONDUIT_HVC:
-		insn = aarch64_insn_get_hvc_value();
-		break;
-	case SMCCC_CONDUIT_SMC:
-		insn = aarch64_insn_get_smc_value();
-		break;
-	default:
-		return;
-	}
-
-	*updptr = cpu_to_le32(insn);
-}
+int ssbd_state __read_mostly = ARM64_SSBD_UNKNOWN;
 
 void __init arm64_enable_wa2_handling(struct alt_instr *alt,
 				      __le32 *origptr, __le32 *updptr,
@@ -177,144 +122,6 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
 		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
 }
 
-void arm64_set_ssbd_mitigation(bool state)
-{
-	int conduit;
-
-	if (this_cpu_has_cap(ARM64_SSBS)) {
-		if (state)
-			asm volatile(SET_PSTATE_SSBS(0));
-		else
-			asm volatile(SET_PSTATE_SSBS(1));
-		return;
-	}
-
-	conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
-				       NULL);
-
-	WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
-}
-
-static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
-				    int scope)
-{
-	struct arm_smccc_res res;
-	bool required = true;
-	s32 val;
-	bool this_cpu_safe = false;
-	int conduit;
-
-	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
-
-	if (cpu_mitigations_off())
-		ssbd_state = ARM64_SSBD_FORCE_DISABLE;
-
-	/* delay setting __ssb_safe until we get a firmware response */
-	if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
-		this_cpu_safe = true;
-
-	if (this_cpu_has_cap(ARM64_SSBS)) {
-		if (!this_cpu_safe)
-			__ssb_safe = false;
-		required = false;
-		goto out_printmsg;
-	}
-
-	conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
-				       ARM_SMCCC_ARCH_WORKAROUND_2, &res);
-
-	if (conduit == SMCCC_CONDUIT_NONE) {
-		ssbd_state = ARM64_SSBD_UNKNOWN;
-		if (!this_cpu_safe)
-			__ssb_safe = false;
-		return false;
-	}
-
-	val = (s32)res.a0;
-
-	switch (val) {
-	case SMCCC_RET_NOT_SUPPORTED:
-		ssbd_state = ARM64_SSBD_UNKNOWN;
-		if (!this_cpu_safe)
-			__ssb_safe = false;
-		return false;
-
-	/* machines with mixed mitigation requirements must not return this */
-	case SMCCC_RET_NOT_REQUIRED:
-		pr_info_once("%s mitigation not required\n", entry->desc);
-		ssbd_state = ARM64_SSBD_MITIGATED;
-		return false;
-
-	case SMCCC_RET_SUCCESS:
-		__ssb_safe = false;
-		required = true;
-		break;
-
-	case 1:	/* Mitigation not required on this CPU */
-		required = false;
-		break;
-
-	default:
-		WARN_ON(1);
-		if (!this_cpu_safe)
-			__ssb_safe = false;
-		return false;
-	}
-
-	switch (ssbd_state) {
-	case ARM64_SSBD_FORCE_DISABLE:
-		arm64_set_ssbd_mitigation(false);
-		required = false;
-		break;
-
-	case ARM64_SSBD_KERNEL:
-		if (required) {
-			__this_cpu_write(arm64_ssbd_callback_required, 1);
-			arm64_set_ssbd_mitigation(true);
-		}
-		break;
-
-	case ARM64_SSBD_FORCE_ENABLE:
-		arm64_set_ssbd_mitigation(true);
-		required = true;
-		break;
-
-	default:
-		WARN_ON(1);
-		break;
-	}
-
-out_printmsg:
-	switch (ssbd_state) {
-	case ARM64_SSBD_FORCE_DISABLE:
-		pr_info_once("%s disabled from command-line\n", entry->desc);
-		break;
-
-	case ARM64_SSBD_FORCE_ENABLE:
-		pr_info_once("%s forced from command-line\n", entry->desc);
-		break;
-	}
-
-	return required;
-}
-
-static void cpu_enable_ssbd_mitigation(const struct arm64_cpu_capabilities *cap)
-{
-	if (ssbd_state != ARM64_SSBD_FORCE_DISABLE)
-		cap->matches(cap, SCOPE_LOCAL_CPU);
-}
-
-/* known invulnerable cores */
-static const struct midr_range arm64_ssb_cpus[] = {
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
-	MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
-	MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
-	MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
-	MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
-	{},
-};
-
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
@@ -674,12 +481,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	},
 #endif
 	{
-		.desc = "Speculative Store Bypass Disable",
+		.desc = "Spectre-v4",
 		.capability = ARM64_SPECTRE_V4,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
-		.matches = has_ssbd_mitigation,
-		.cpu_enable = cpu_enable_ssbd_mitigation,
-		.midr_range_list = arm64_ssb_cpus,
+		.matches = has_spectre_v4,
+		.cpu_enable = spectre_v4_enable_mitigation,
 	},
 #ifdef CONFIG_ARM64_ERRATUM_1418040
 	{
@@ -732,18 +538,3 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 	}
 };
-
-ssize_t cpu_show_spec_store_bypass(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	if (__ssb_safe)
-		return sprintf(buf, "Not affected\n");
-
-	switch (ssbd_state) {
-	case ARM64_SSBD_KERNEL:
-	case ARM64_SSBD_FORCE_ENABLE:
-		return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n");
-	}
-
-	return sprintf(buf, "Vulnerable\n");
-}
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 033e6c9527055c..a4debb63ebfbf3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1583,46 +1583,6 @@ static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
 	WARN_ON(val & (7 << 27 | 7 << 21));
 }
 
-static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
-{
-	if (user_mode(regs))
-		return 1;
-
-	if (instr & BIT(PSTATE_Imm_shift))
-		regs->pstate |= PSR_SSBS_BIT;
-	else
-		regs->pstate &= ~PSR_SSBS_BIT;
-
-	arm64_skip_faulting_instruction(regs, 4);
-	return 0;
-}
-
-static struct undef_hook ssbs_emulation_hook = {
-	.instr_mask	= ~(1U << PSTATE_Imm_shift),
-	.instr_val	= 0xd500401f | PSTATE_SSBS,
-	.fn		= ssbs_emulation_handler,
-};
-
-static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused)
-{
-	static bool undef_hook_registered = false;
-	static DEFINE_RAW_SPINLOCK(hook_lock);
-
-	raw_spin_lock(&hook_lock);
-	if (!undef_hook_registered) {
-		register_undef_hook(&ssbs_emulation_hook);
-		undef_hook_registered = true;
-	}
-	raw_spin_unlock(&hook_lock);
-
-	if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
-		sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
-		arm64_set_ssbd_mitigation(false);
-	} else {
-		arm64_set_ssbd_mitigation(true);
-	}
-}
-
 #ifdef CONFIG_ARM64_PAN
 static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
 {
@@ -1983,7 +1943,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR1_SSBS_SHIFT,
 		.sign = FTR_UNSIGNED,
 		.min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY,
-		.cpu_enable = cpu_enable_ssbs,
 	},
 #ifdef CONFIG_ARM64_CNP
 	{
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 81b709349d7b46..aeb337029d5674 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -132,8 +132,8 @@ alternative_else_nop_endif
 	 * them if required.
 	 */
 	.macro	apply_ssbd, state, tmp1, tmp2
-alternative_cb	arm64_enable_wa2_handling
-	b	.L__asm_ssbd_skip\@
+alternative_cb	spectre_v4_patch_fw_mitigation_enable
+	b	.L__asm_ssbd_skip\@		// Patched to NOP
 alternative_cb_end
 	ldr_this_cpu	\tmp2, arm64_ssbd_callback_required, \tmp1
 	cbz	\tmp2,	.L__asm_ssbd_skip\@
@@ -141,7 +141,7 @@ alternative_cb_end
 	tbnz	\tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@
 	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 	mov	w1, #\state
-alternative_cb	arm64_update_smccc_conduit
+alternative_cb	spectre_v4_patch_fw_mitigation_conduit
 	nop					// Patched to SMC/HVC #0
 alternative_cb_end
 .L__asm_ssbd_skip\@:
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 68e14152d6e9b0..c7b00120dc3e4b 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -332,11 +332,7 @@ int swsusp_arch_suspend(void)
 		 * mitigation off behind our back, let's set the state
 		 * to what we expect it to be.
 		 */
-		switch (arm64_get_ssbd_state()) {
-		case ARM64_SSBD_FORCE_ENABLE:
-		case ARM64_SSBD_KERNEL:
-			arm64_set_ssbd_mitigation(true);
-		}
+		spectre_v4_enable_mitigation(NULL);
 	}
 
 	local_daif_restore(flags);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f1804496b93508..9dbd35b95253a7 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -421,8 +421,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 		    cpus_have_const_cap(ARM64_HAS_UAO))
 			childregs->pstate |= PSR_UAO_BIT;
 
-		if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
-			set_ssbs_bit(childregs);
+		spectre_v4_enable_task_mitigation(p);
 
 		if (system_uses_irq_prio_masking())
 			childregs->pmr_save = GIC_PRIO_IRQON;
@@ -472,8 +471,6 @@ void uao_thread_switch(struct task_struct *next)
  */
 static void ssbs_thread_switch(struct task_struct *next)
 {
-	struct pt_regs *regs = task_pt_regs(next);
-
 	/*
 	 * Nothing to do for kernel threads, but 'regs' may be junk
 	 * (e.g. idle task) so check the flags and bail early.
@@ -485,18 +482,10 @@ static void ssbs_thread_switch(struct task_struct *next)
 	 * If all CPUs implement the SSBS extension, then we just need to
 	 * context-switch the PSTATE field.
 	 */
-	if (cpu_have_feature(cpu_feature(SSBS)))
-		return;
-
-	/* If the mitigation is enabled, then we leave SSBS clear. */
-	if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
-	    test_tsk_thread_flag(next, TIF_SSBD))
+	if (cpus_have_const_cap(ARM64_SSBS))
 		return;
 
-	if (compat_user_mode(regs))
-		set_compat_ssbs_bit(regs);
-	else if (user_mode(regs))
-		set_ssbs_bit(regs);
+	spectre_v4_enable_task_mitigation(next);
 }
 
 /*
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index b373e4782ccd4a..b1ea935fd9481f 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -320,79 +320,394 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
 	update_mitigation_state(&spectre_v2_state, state);
 }
 
-/* Spectre v4 prctl */
-static void ssbd_ssbs_enable(struct task_struct *task)
+/*
+ * Spectre v4.
+ *
+ * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is
+ * either:
+ *
+ * - Mitigated in hardware and listed in our "safe list".
+ * - Mitigated in hardware via PSTATE.SSBS.
+ * - Mitigated in software by firmware (sometimes referred to as SSBD).
+ *
+ * Wait, that doesn't sound so bad, does it? Keep reading...
+ *
+ * A major source of headaches is that the software mitigation is enabled both
+ * on a per-task basis, but can also be forced on for the kernel, necessitating
+ * both context-switch *and* entry/exit hooks. To make it even worse, some CPUs
+ * allow EL0 to toggle SSBS directly, which can end up with the prctl() state
+ * being stale when re-entering the kernel. The usual big.LITTLE caveats apply,
+ * so you can have systems that have both firmware and SSBS mitigations. This
+ * means we actually have to reject late onlining of CPUs with mitigations if
+ * all of the currently onlined CPUs are safelisted, as the mitigation tends to
+ * be opt-in for userspace. Yes, really, the cure is worse than the disease.
+ *
+ * The only good part is that if the firmware mitigation is present, then it is
+ * present for all CPUs, meaning we don't have to worry about late onlining of a
+ * vulnerable CPU if one of the boot CPUs is using the firmware mitigation.
+ *
+ * Give me a VAX-11/780 any day of the week...
+ */
+static enum mitigation_state spectre_v4_state;
+
+/* This is the per-cpu state tracking whether we need to talk to firmware */
+DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
+
+enum spectre_v4_policy {
+	SPECTRE_V4_POLICY_MITIGATION_DYNAMIC,
+	SPECTRE_V4_POLICY_MITIGATION_ENABLED,
+	SPECTRE_V4_POLICY_MITIGATION_DISABLED,
+};
+
+static enum spectre_v4_policy __read_mostly __spectre_v4_policy;
+
+static const struct spectre_v4_param {
+	const char		*str;
+	enum spectre_v4_policy	policy;
+} spectre_v4_params[] = {
+	{ "force-on",	SPECTRE_V4_POLICY_MITIGATION_ENABLED, },
+	{ "force-off",	SPECTRE_V4_POLICY_MITIGATION_DISABLED, },
+	{ "kernel",	SPECTRE_V4_POLICY_MITIGATION_DYNAMIC, },
+};
+static int __init parse_spectre_v4_param(char *str)
 {
-	u64 val = is_compat_thread(task_thread_info(task)) ?
-		  PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+	int i;
 
-	task_pt_regs(task)->pstate |= val;
-}
+	if (!str || !str[0])
+		return -EINVAL;
 
-static void ssbd_ssbs_disable(struct task_struct *task)
-{
-	u64 val = is_compat_thread(task_thread_info(task)) ?
-		  PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+	for (i = 0; i < ARRAY_SIZE(spectre_v4_params); i++) {
+		const struct spectre_v4_param *param = &spectre_v4_params[i];
+
+		if (strncmp(str, param->str, strlen(param->str)))
+			continue;
 
-	task_pt_regs(task)->pstate &= ~val;
+		__spectre_v4_policy = param->policy;
+		return 0;
+	}
+
+	return -EINVAL;
 }
+early_param("ssbd", parse_spectre_v4_param);
 
 /*
- * prctl interface for SSBD
+ * Because this was all written in a rush by people working in different silos,
+ * we've ended up with multiple command line options to control the same thing.
+ * Wrap these up in some helpers, which prefer disabling the mitigation if faced
+ * with contradictory parameters. The mitigation is always either "off",
+ * "dynamic" or "on".
  */
-static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+static bool spectre_v4_mitigations_off(void)
 {
-	int state = arm64_get_ssbd_state();
+	bool ret = cpu_mitigations_off() ||
+		   __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED;
 
-	/* Unsupported */
-	if (state == ARM64_SSBD_UNKNOWN)
-		return -ENODEV;
+	if (ret)
+		pr_info_once("spectre-v4 mitigation disabled by command-line option\n");
 
-	/* Treat the unaffected/mitigated state separately */
-	if (state == ARM64_SSBD_MITIGATED) {
-		switch (ctrl) {
-		case PR_SPEC_ENABLE:
-			return -EPERM;
-		case PR_SPEC_DISABLE:
-		case PR_SPEC_FORCE_DISABLE:
-			return 0;
-		}
+	return ret;
+}
+
+/* Do we need to toggle the mitigation state on entry to/exit from the kernel? */
+static bool spectre_v4_mitigations_dynamic(void)
+{
+	return !spectre_v4_mitigations_off() &&
+	       __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DYNAMIC;
+}
+
+static bool spectre_v4_mitigations_on(void)
+{
+	return !spectre_v4_mitigations_off() &&
+	       __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_ENABLED;
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	switch (spectre_v4_state) {
+	case SPECTRE_UNAFFECTED:
+		return sprintf(buf, "Not affected\n");
+	case SPECTRE_MITIGATED:
+		return sprintf(buf, "Mitigation: Speculative Store Bypass disabled via prctl\n");
+	case SPECTRE_VULNERABLE:
+		fallthrough;
+	default:
+		return sprintf(buf, "Vulnerable\n");
 	}
+}
+
+enum mitigation_state arm64_get_spectre_v4_state(void)
+{
+	return spectre_v4_state;
+}
+
+static enum mitigation_state spectre_v4_get_cpu_hw_mitigation_state(void)
+{
+	static const struct midr_range spectre_v4_safe_list[] = {
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+		MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER),
+		MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER),
+		{ /* sentinel */ },
+	};
+
+	if (is_midr_in_range_list(read_cpuid_id(), spectre_v4_safe_list))
+		return SPECTRE_UNAFFECTED;
+
+	/* CPU features are detected first */
+	if (this_cpu_has_cap(ARM64_SSBS))
+		return SPECTRE_MITIGATED;
+
+	return SPECTRE_VULNERABLE;
+}
+
+static enum mitigation_state spectre_v4_get_cpu_fw_mitigation_state(void)
+{
+	int ret;
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_2, &res);
+
+	ret = res.a0;
+	switch (ret) {
+	case SMCCC_RET_SUCCESS:
+		return SPECTRE_MITIGATED;
+	case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+		fallthrough;
+	case SMCCC_RET_NOT_REQUIRED:
+		return SPECTRE_UNAFFECTED;
+	default:
+		fallthrough;
+	case SMCCC_RET_NOT_SUPPORTED:
+		return SPECTRE_VULNERABLE;
+	}
+}
+
+bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope)
+{
+	enum mitigation_state state;
+
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	state = spectre_v4_get_cpu_hw_mitigation_state();
+	if (state == SPECTRE_VULNERABLE)
+		state = spectre_v4_get_cpu_fw_mitigation_state();
+
+	return state != SPECTRE_UNAFFECTED;
+}
+
+static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr)
+{
+	if (user_mode(regs))
+		return 1;
+
+	if (instr & BIT(PSTATE_Imm_shift))
+		regs->pstate |= PSR_SSBS_BIT;
+	else
+		regs->pstate &= ~PSR_SSBS_BIT;
+
+	arm64_skip_faulting_instruction(regs, 4);
+	return 0;
+}
+
+static struct undef_hook ssbs_emulation_hook = {
+	.instr_mask	= ~(1U << PSTATE_Imm_shift),
+	.instr_val	= 0xd500401f | PSTATE_SSBS,
+	.fn		= ssbs_emulation_handler,
+};
+
+static enum mitigation_state spectre_v4_enable_hw_mitigation(void)
+{
+	static bool undef_hook_registered = false;
+	static DEFINE_RAW_SPINLOCK(hook_lock);
+	enum mitigation_state state;
 
 	/*
-	 * Things are a bit backward here: the arm64 internal API
-	 * *enables the mitigation* when the userspace API *disables
-	 * speculation*. So much fun.
+	 * If the system is mitigated but this CPU doesn't have SSBS, then
+	 * we must be on the safelist and there's nothing more to do.
 	 */
+	state = spectre_v4_get_cpu_hw_mitigation_state();
+	if (state != SPECTRE_MITIGATED || !this_cpu_has_cap(ARM64_SSBS))
+		return state;
+
+	raw_spin_lock(&hook_lock);
+	if (!undef_hook_registered) {
+		register_undef_hook(&ssbs_emulation_hook);
+		undef_hook_registered = true;
+	}
+	raw_spin_unlock(&hook_lock);
+
+	if (spectre_v4_mitigations_off()) {
+		sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS);
+		asm volatile(SET_PSTATE_SSBS(1));
+		return SPECTRE_VULNERABLE;
+	}
+
+	/* SCTLR_EL1.DSSBS was initialised to 0 during boot */
+	asm volatile(SET_PSTATE_SSBS(0));
+	return SPECTRE_MITIGATED;
+}
+
+/*
+ * Patch a branch over the Spectre-v4 mitigation code with a NOP so that
+ * we fallthrough and check whether firmware needs to be called on this CPU.
+ */
+void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt,
+						  __le32 *origptr,
+						  __le32 *updptr, int nr_inst)
+{
+	BUG_ON(nr_inst != 1); /* Branch -> NOP */
+
+	if (spectre_v4_mitigations_off())
+		return;
+
+	if (cpus_have_final_cap(ARM64_SSBS))
+		return;
+
+	if (spectre_v4_mitigations_dynamic())
+		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
+/*
+ * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction
+ * to call into firmware to adjust the mitigation state.
+ */
+void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt,
+						   __le32 *origptr,
+						   __le32 *updptr, int nr_inst)
+{
+	u32 insn;
+
+	BUG_ON(nr_inst != 1); /* NOP -> HVC/SMC */
+
+	switch (arm_smccc_1_1_get_conduit()) {
+	case SMCCC_CONDUIT_HVC:
+		insn = aarch64_insn_get_hvc_value();
+		break;
+	case SMCCC_CONDUIT_SMC:
+		insn = aarch64_insn_get_smc_value();
+		break;
+	default:
+		return;
+	}
+
+	*updptr = cpu_to_le32(insn);
+}
+
+static enum mitigation_state spectre_v4_enable_fw_mitigation(void)
+{
+	enum mitigation_state state;
+
+	state = spectre_v4_get_cpu_fw_mitigation_state();
+	if (state != SPECTRE_MITIGATED)
+		return state;
+
+	if (spectre_v4_mitigations_off()) {
+		arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, false, NULL);
+		return SPECTRE_VULNERABLE;
+	}
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, true, NULL);
+
+	if (spectre_v4_mitigations_dynamic())
+		__this_cpu_write(arm64_ssbd_callback_required, 1);
+
+	return SPECTRE_MITIGATED;
+}
+
+void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused)
+{
+	enum mitigation_state state;
+
+	WARN_ON(preemptible());
+
+	state = spectre_v4_enable_hw_mitigation();
+	if (state == SPECTRE_VULNERABLE)
+		state = spectre_v4_enable_fw_mitigation();
+
+	update_mitigation_state(&spectre_v4_state, state);
+}
+
+static void __update_pstate_ssbs(struct pt_regs *regs, bool state)
+{
+	u64 bit = compat_user_mode(regs) ? PSR_AA32_SSBS_BIT : PSR_SSBS_BIT;
+
+	if (state)
+		regs->pstate |= bit;
+	else
+		regs->pstate &= ~bit;
+}
+
+void spectre_v4_enable_task_mitigation(struct task_struct *tsk)
+{
+	struct pt_regs *regs = task_pt_regs(tsk);
+	bool ssbs = false, kthread = tsk->flags & PF_KTHREAD;
+
+	if (spectre_v4_mitigations_off())
+		ssbs = true;
+	else if (spectre_v4_mitigations_dynamic() && !kthread)
+		ssbs = !test_tsk_thread_flag(tsk, TIF_SSBD);
+
+	__update_pstate_ssbs(regs, ssbs);
+}
+
+/*
+ * The Spectre-v4 mitigation can be controlled via a prctl() from userspace.
+ * This is interesting because the "speculation disabled" behaviour can be
+ * configured so that it is preserved across exec(), which means that the
+ * prctl() may be necessary even when PSTATE.SSBS can be toggled directly
+ * from userspace.
+ */
+static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
 	switch (ctrl) {
 	case PR_SPEC_ENABLE:
-		/* If speculation is force disabled, enable is not allowed */
-		if (state == ARM64_SSBD_FORCE_ENABLE ||
-		    task_spec_ssb_force_disable(task))
+		/* Enable speculation: disable mitigation */
+		/*
+		 * Force disabled speculation prevents it from being
+		 * re-enabled.
+		 */
+		if (task_spec_ssb_force_disable(task))
+			return -EPERM;
+
+		/*
+		 * If the mitigation is forced on, then speculation is forced
+		 * off and we again prevent it from being re-enabled.
+		 */
+		if (spectre_v4_mitigations_on())
 			return -EPERM;
+
 		task_clear_spec_ssb_disable(task);
 		clear_tsk_thread_flag(task, TIF_SSBD);
-		ssbd_ssbs_enable(task);
-		break;
-	case PR_SPEC_DISABLE:
-		if (state == ARM64_SSBD_FORCE_DISABLE)
-			return -EPERM;
-		task_set_spec_ssb_disable(task);
-		set_tsk_thread_flag(task, TIF_SSBD);
-		ssbd_ssbs_disable(task);
 		break;
 	case PR_SPEC_FORCE_DISABLE:
-		if (state == ARM64_SSBD_FORCE_DISABLE)
+		/* Force disable speculation: force enable mitigation */
+		/*
+		 * If the mitigation is forced off, then speculation is forced
+		 * on and we prevent it from being disabled.
+		 */
+		if (spectre_v4_mitigations_off())
 			return -EPERM;
-		task_set_spec_ssb_disable(task);
+
 		task_set_spec_ssb_force_disable(task);
+		fallthrough;
+	case PR_SPEC_DISABLE:
+		/* Disable speculation: enable mitigation */
+		/* Same as PR_SPEC_FORCE_DISABLE */
+		if (spectre_v4_mitigations_off())
+			return -EPERM;
+
+		task_set_spec_ssb_disable(task);
 		set_tsk_thread_flag(task, TIF_SSBD);
-		ssbd_ssbs_disable(task);
 		break;
 	default:
 		return -ERANGE;
 	}
 
+	spectre_v4_enable_task_mitigation(task);
 	return 0;
 }
 
@@ -409,22 +724,32 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
 
 static int ssbd_prctl_get(struct task_struct *task)
 {
-	switch (arm64_get_ssbd_state()) {
-	case ARM64_SSBD_UNKNOWN:
-		return -ENODEV;
-	case ARM64_SSBD_FORCE_ENABLE:
-		return PR_SPEC_DISABLE;
-	case ARM64_SSBD_KERNEL:
-		if (task_spec_ssb_force_disable(task))
-			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
-		if (task_spec_ssb_disable(task))
-			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
-		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
-	case ARM64_SSBD_FORCE_DISABLE:
-		return PR_SPEC_ENABLE;
-	default:
+	switch (spectre_v4_state) {
+	case SPECTRE_UNAFFECTED:
 		return PR_SPEC_NOT_AFFECTED;
+	case SPECTRE_MITIGATED:
+		if (spectre_v4_mitigations_on())
+			return PR_SPEC_NOT_AFFECTED;
+
+		if (spectre_v4_mitigations_dynamic())
+			break;
+
+		/* Mitigations are disabled, so we're vulnerable. */
+		fallthrough;
+	case SPECTRE_VULNERABLE:
+		fallthrough;
+	default:
+		return PR_SPEC_ENABLE;
 	}
+
+	/* Check the mitigation state for this task */
+	if (task_spec_ssb_force_disable(task))
+		return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+
+	if (task_spec_ssb_disable(task))
+		return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+
+	return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
 }
 
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index c1dee9066ff971..584c14ce3c860d 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -72,8 +72,7 @@ void notrace __cpu_suspend_exit(void)
 	 * have turned the mitigation on. If the user has forcefully
 	 * disabled it, make sure their wishes are obeyed.
 	 */
-	if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
-		arm64_set_ssbd_mitigation(false);
+	spectre_v4_enable_mitigation(NULL);
 }
 
 /*

From 29e8910a566aad3ee72f729e45842858d51ced8d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 18 Sep 2020 12:25:40 +0100
Subject: [PATCH 208/262] KVM: arm64: Simplify handling of ARCH_WORKAROUND_2

Owing to the fact that the host kernel is always mitigated, we can
drastically simplify the WA2 handling by keeping the mitigation
state ON when entering the guest. This means the guest is either
unaffected or not mitigated.

This results in a nice simplification of the mitigation space,
and the removal of a lot of code that was never really used anyway.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_asm.h        |  3 --
 arch/arm64/include/asm/kvm_emulate.h    | 14 ------
 arch/arm64/include/asm/kvm_mmu.h        | 17 -------
 arch/arm64/include/uapi/asm/kvm.h       |  9 ++++
 arch/arm64/kernel/cpu_errata.c          | 14 ------
 arch/arm64/kernel/image-vars.h          |  2 -
 arch/arm64/kvm/arm.c                    |  4 --
 arch/arm64/kvm/hyp/hyp-entry.S          | 27 -----------
 arch/arm64/kvm/hyp/include/hyp/switch.h | 29 ------------
 arch/arm64/kvm/hyp/nvhe/switch.c        |  4 --
 arch/arm64/kvm/hyp/vhe/switch.c         |  4 --
 arch/arm64/kvm/hypercalls.c             | 14 +++---
 arch/arm64/kvm/psci.c                   | 59 +++++++++++--------------
 arch/arm64/kvm/reset.c                  |  4 --
 14 files changed, 41 insertions(+), 163 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e9378cc8049d0d..abe02cf6688088 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -9,9 +9,6 @@
 
 #include <asm/virt.h>
 
-#define	VCPU_WORKAROUND_2_FLAG_SHIFT	0
-#define	VCPU_WORKAROUND_2_FLAG		(_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
-
 #define ARM_EXIT_WITH_SERROR_BIT  31
 #define ARM_EXCEPTION_CODE(x)	  ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
 #define ARM_EXCEPTION_IS_TRAP(x)  (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 49a55be2b9a20a..96eccb107ec2e7 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -383,20 +383,6 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
 }
 
-static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
-}
-
-static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
-						      bool flag)
-{
-	if (flag)
-		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
-	else
-		vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
-}
-
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
 	if (vcpu_mode_is_32bit(vcpu)) {
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 873e12430ac73c..36606ef9e43595 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -526,23 +526,6 @@ static inline int kvm_map_vectors(void)
 }
 #endif
 
-DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
-
-static inline int hyp_map_aux_data(void)
-{
-	int cpu, err;
-
-	for_each_possible_cpu(cpu) {
-		u64 *ptr;
-
-		ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu);
-		err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP);
-		if (err)
-			return err;
-	}
-	return 0;
-}
-
 #define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
 
 /*
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f06017..7d804fd0a6822e 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -242,6 +242,15 @@ struct kvm_vcpu_events {
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL		0
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL		1
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED	2
+
+/*
+ * Only two states can be presented by the host kernel:
+ * - NOT_REQUIRED: the guest doesn't need to do anything
+ * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available)
+ *
+ * All the other values are deprecated. The host still accepts all
+ * values (they are ABI), but will narrow them to the above two.
+ */
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2	KVM_REG_ARM_FW_REG(2)
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL		0
 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN		1
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 7fc54c3d428509..7e9caef13db43f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -108,20 +108,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
 
 int ssbd_state __read_mostly = ARM64_SSBD_UNKNOWN;
 
-void __init arm64_enable_wa2_handling(struct alt_instr *alt,
-				      __le32 *origptr, __le32 *updptr,
-				      int nr_inst)
-{
-	BUG_ON(nr_inst != 1);
-	/*
-	 * Only allow mitigation on EL1 entry/exit and guest
-	 * ARCH_WORKAROUND_2 handling if the SSBD state allows it to
-	 * be flipped.
-	 */
-	if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL)
-		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
-}
-
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 8982b68289b79f..d0f3f35dd0d708 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -64,12 +64,10 @@ __efistub__ctype		= _ctype;
 #define KVM_NVHE_ALIAS(sym) __kvm_nvhe_##sym = sym;
 
 /* Alternative callbacks for init-time patching of nVHE hyp code. */
-KVM_NVHE_ALIAS(arm64_enable_wa2_handling);
 KVM_NVHE_ALIAS(kvm_patch_vector_branch);
 KVM_NVHE_ALIAS(kvm_update_va_mask);
 
 /* Global kernel state accessed by nVHE hyp code. */
-KVM_NVHE_ALIAS(arm64_ssbd_callback_required);
 KVM_NVHE_ALIAS(kvm_host_data);
 KVM_NVHE_ALIAS(kvm_vgic_global_state);
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 46dc3d75cf1359..b1bd2f4cd3e068 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1549,10 +1549,6 @@ static int init_hyp_mode(void)
 		}
 	}
 
-	err = hyp_map_aux_data();
-	if (err)
-		kvm_err("Cannot map host auxiliary data: %d\n", err);
-
 	return 0;
 
 out_err:
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index db2dd750061731..3cca756e9e412c 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -116,33 +116,6 @@ el1_hvc_guest:
 			  ARM_SMCCC_ARCH_WORKAROUND_2)
 	cbnz	w1, el1_trap
 
-alternative_cb	arm64_enable_wa2_handling
-	b	wa2_end
-alternative_cb_end
-	get_vcpu_ptr	x2, x0
-	ldr	x0, [x2, #VCPU_WORKAROUND_FLAGS]
-
-	// Sanitize the argument and update the guest flags
-	ldr	x1, [sp, #8]			// Guest's x1
-	clz	w1, w1				// Murphy's device:
-	lsr	w1, w1, #5			// w1 = !!w1 without using
-	eor	w1, w1, #1			// the flags...
-	bfi	x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
-	str	x0, [x2, #VCPU_WORKAROUND_FLAGS]
-
-	/* Check that we actually need to perform the call */
-	hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
-	cbz	x0, wa2_end
-
-	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_2
-	smc	#0
-
-	/* Don't leak data from the SMC call */
-	mov	x3, xzr
-wa2_end:
-	mov	x2, xzr
-	mov	x1, xzr
-
 wa_epilogue:
 	mov	x0, xzr
 	add	sp, sp, #16
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 5dec5589625d11..a6840823b60ef0 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -479,35 +479,6 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	return false;
 }
 
-static inline bool __needs_ssbd_off(struct kvm_vcpu *vcpu)
-{
-	if (!cpus_have_final_cap(ARM64_SPECTRE_V4))
-		return false;
-
-	return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
-}
-
-static inline void __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * The host runs with the workaround always present. If the
-	 * guest wants it disabled, so be it...
-	 */
-	if (__needs_ssbd_off(vcpu) &&
-	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
-}
-
-static inline void __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-	/*
-	 * If the guest has disabled the workaround, bring it back on.
-	 */
-	if (__needs_ssbd_off(vcpu) &&
-	    __hyp_this_cpu_read(arm64_ssbd_callback_required))
-		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
-}
-
 static inline void __kvm_unexpected_el2_exception(void)
 {
 	unsigned long addr, fixup;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 0970442d2dbcf7..8d3dd4f479244c 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -202,8 +202,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	__debug_switch_to_guest(vcpu);
 
-	__set_guest_arch_workaround_state(vcpu);
-
 	do {
 		/* Jump in the fire! */
 		exit_code = __guest_enter(vcpu, host_ctxt);
@@ -211,8 +209,6 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 		/* And we're baaack! */
 	} while (fixup_guest_exit(vcpu, &exit_code));
 
-	__set_host_arch_workaround_state(vcpu);
-
 	__sysreg_save_state_nvhe(guest_ctxt);
 	__sysreg32_save_state(vcpu);
 	__timer_disable_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index c1da4f86ccacac..ecf67e678203f8 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -131,8 +131,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 	sysreg_restore_guest_state_vhe(guest_ctxt);
 	__debug_switch_to_guest(vcpu);
 
-	__set_guest_arch_workaround_state(vcpu);
-
 	do {
 		/* Jump in the fire! */
 		exit_code = __guest_enter(vcpu, host_ctxt);
@@ -140,8 +138,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 		/* And we're baaack! */
 	} while (fixup_guest_exit(vcpu, &exit_code));
 
-	__set_host_arch_workaround_state(vcpu);
-
 	sysreg_save_guest_state_vhe(guest_ctxt);
 
 	__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 413d46b9bc07ca..69e023dfafce1c 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -36,15 +36,13 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 			}
 			break;
 		case ARM_SMCCC_ARCH_WORKAROUND_2:
-			switch (kvm_arm_have_ssbd()) {
-			case KVM_SSBD_FORCE_DISABLE:
-			case KVM_SSBD_UNKNOWN:
+			switch (arm64_get_ssbd_state()) {
+			case ARM64_SSBD_FORCE_DISABLE:
+			case ARM64_SSBD_UNKNOWN:
 				break;
-			case KVM_SSBD_KERNEL:
-				val = SMCCC_RET_SUCCESS;
-				break;
-			case KVM_SSBD_FORCE_ENABLE:
-			case KVM_SSBD_MITIGATED:
+			case ARM64_SSBD_KERNEL:
+			case ARM64_SSBD_FORCE_ENABLE:
+			case ARM64_SSBD_MITIGATED:
 				val = SMCCC_RET_NOT_REQUIRED;
 				break;
 			}
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index fbdd6f3bea7f14..87e6e3818fb584 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -435,17 +435,15 @@ static int get_kernel_wa_level(u64 regid)
 		}
 		return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
-		switch (kvm_arm_have_ssbd()) {
-		case KVM_SSBD_FORCE_DISABLE:
-			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
-		case KVM_SSBD_KERNEL:
-			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
-		case KVM_SSBD_FORCE_ENABLE:
-		case KVM_SSBD_MITIGATED:
+		switch (arm64_get_ssbd_state()) {
+		case ARM64_SSBD_FORCE_ENABLE:
+		case ARM64_SSBD_MITIGATED:
+		case ARM64_SSBD_KERNEL:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
-		case KVM_SSBD_UNKNOWN:
+		case ARM64_SSBD_UNKNOWN:
+		case ARM64_SSBD_FORCE_DISABLE:
 		default:
-			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
+			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
 		}
 	}
 
@@ -462,14 +460,8 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 		val = kvm_psci_version(vcpu, vcpu->kvm);
 		break;
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
-		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
-		break;
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
 		val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
-
-		if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
-		    kvm_arm_get_vcpu_workaround_2_flag(vcpu))
-			val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
 		break;
 	default:
 		return -ENOENT;
@@ -527,34 +519,35 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 			    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
 			return -EINVAL;
 
-		wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
-
-		if (get_kernel_wa_level(reg->id) < wa_level)
-			return -EINVAL;
-
 		/* The enabled bit must not be set unless the level is AVAIL. */
-		if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
-		    wa_level != val)
+		if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
+		    (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
 			return -EINVAL;
 
-		/* Are we finished or do we need to check the enable bit ? */
-		if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
-			return 0;
-
 		/*
-		 * If this kernel supports the workaround to be switched on
-		 * or off, make sure it matches the requested setting.
+		 * Map all the possible incoming states to the only two we
+		 * really want to deal with.
 		 */
-		switch (wa_level) {
-		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
-			kvm_arm_set_vcpu_workaround_2_flag(vcpu,
-			    val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
+		switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
+		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
+		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
+			wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
 			break;
+		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
 		case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
-			kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);
+			wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
 			break;
+		default:
+			return -EINVAL;
 		}
 
+		/*
+		 * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
+		 * other way around.
+		 */
+		if (get_kernel_wa_level(reg->id) < wa_level)
+			return -EINVAL;
+
 		return 0;
 	default:
 		return -ENOENT;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index ee33875c5c2af9..f6e8b4a75cbbe2 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -319,10 +319,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 		vcpu->arch.reset_state.reset = false;
 	}
 
-	/* Default workaround setup is enabled (if supported) */
-	if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
-		vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
-
 	/* Reset timer */
 	ret = kvm_timer_vcpu_reset(vcpu);
 out:

From 7311467702710cc30ac4e3a6c6670a766e7667f9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 18 Sep 2020 13:59:32 +0100
Subject: [PATCH 209/262] KVM: arm64: Get rid of kvm_arm_have_ssbd()

kvm_arm_have_ssbd() is now completely unused, get rid of it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 59f6cff0f0cb0d..8785ebf9bc2921 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -631,29 +631,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {}
 static inline void kvm_clr_pmu_events(u32 clr) {}
 #endif
 
-#define KVM_SSBD_UNKNOWN		-1
-#define KVM_SSBD_FORCE_DISABLE		0
-#define KVM_SSBD_KERNEL		1
-#define KVM_SSBD_FORCE_ENABLE		2
-#define KVM_SSBD_MITIGATED		3
-
-static inline int kvm_arm_have_ssbd(void)
-{
-	switch (arm64_get_ssbd_state()) {
-	case ARM64_SSBD_FORCE_DISABLE:
-		return KVM_SSBD_FORCE_DISABLE;
-	case ARM64_SSBD_KERNEL:
-		return KVM_SSBD_KERNEL;
-	case ARM64_SSBD_FORCE_ENABLE:
-		return KVM_SSBD_FORCE_ENABLE;
-	case ARM64_SSBD_MITIGATED:
-		return KVM_SSBD_MITIGATED;
-	case ARM64_SSBD_UNKNOWN:
-	default:
-		return KVM_SSBD_UNKNOWN;
-	}
-}
-
 void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
 void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
 

From d63d975a71b332df36cc802e6e77a462af6b9fef Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 18 Sep 2020 14:08:54 +0100
Subject: [PATCH 210/262] KVM: arm64: Convert ARCH_WORKAROUND_2 to
 arm64_get_spectre_v4_state()

Convert the KVM WA2 code to using the Spectre infrastructure,
making the code much more readable. It also allows us to
take SSBS into account for the mitigation.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/arm.c        |  2 +-
 arch/arm64/kvm/hypercalls.c | 23 +++++++++++++++++------
 arch/arm64/kvm/psci.c       | 19 ++++++++++++-------
 3 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index b1bd2f4cd3e068..9b90a701462c21 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1292,7 +1292,7 @@ static void cpu_init_hyp_mode(void)
 	 * at EL2.
 	 */
 	if (this_cpu_has_cap(ARM64_SSBS) &&
-	    arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) {
+	    arm64_get_spectre_v4_state() == SPECTRE_VULNERABLE) {
 		kvm_call_hyp_nvhe(__kvm_enable_ssbs);
 	}
 }
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 69e023dfafce1c..9824025ccc5c04 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -36,13 +36,24 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 			}
 			break;
 		case ARM_SMCCC_ARCH_WORKAROUND_2:
-			switch (arm64_get_ssbd_state()) {
-			case ARM64_SSBD_FORCE_DISABLE:
-			case ARM64_SSBD_UNKNOWN:
+			switch (arm64_get_spectre_v4_state()) {
+			case SPECTRE_VULNERABLE:
 				break;
-			case ARM64_SSBD_KERNEL:
-			case ARM64_SSBD_FORCE_ENABLE:
-			case ARM64_SSBD_MITIGATED:
+			case SPECTRE_MITIGATED:
+				/*
+				 * SSBS everywhere: Indicate no firmware
+				 * support, as the SSBS support will be
+				 * indicated to the guest and the default is
+				 * safe.
+				 *
+				 * Otherwise, expose a permanent mitigation
+				 * to the guest, and hide SSBS so that the
+				 * guest stays protected.
+				 */
+				if (cpus_have_final_cap(ARM64_SSBS))
+					break;
+				fallthrough;
+			case SPECTRE_UNAFFECTED:
 				val = SMCCC_RET_NOT_REQUIRED;
 				break;
 			}
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 87e6e3818fb584..db4056ecccfda9 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -435,14 +435,19 @@ static int get_kernel_wa_level(u64 regid)
 		}
 		return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
 	case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
-		switch (arm64_get_ssbd_state()) {
-		case ARM64_SSBD_FORCE_ENABLE:
-		case ARM64_SSBD_MITIGATED:
-		case ARM64_SSBD_KERNEL:
+		switch (arm64_get_spectre_v4_state()) {
+		case SPECTRE_MITIGATED:
+			/*
+			 * As for the hypercall discovery, we pretend we
+			 * don't have any FW mitigation if SSBS is there at
+			 * all times.
+			 */
+			if (cpus_have_final_cap(ARM64_SSBS))
+				return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
+			fallthrough;
+		case SPECTRE_UNAFFECTED:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
-		case ARM64_SSBD_UNKNOWN:
-		case ARM64_SSBD_FORCE_DISABLE:
-		default:
+		case SPECTRE_VULNERABLE:
 			return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
 		}
 	}

From 31c84d6c9cde673b3866972552ec52e4c522b4fa Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 18 Sep 2020 14:11:25 +0100
Subject: [PATCH 211/262] arm64: Get rid of arm64_ssbd_state

Out with the old ghost, in with the new...

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpufeature.h | 14 --------------
 arch/arm64/kernel/cpu_errata.c      |  2 --
 2 files changed, 16 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 3b48aa121cee81..fba6700b457b7b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -698,20 +698,6 @@ static inline bool system_supports_tlb_range(void)
 		cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
 }
 
-#define ARM64_SSBD_UNKNOWN		-1
-#define ARM64_SSBD_FORCE_DISABLE	0
-#define ARM64_SSBD_KERNEL		1
-#define ARM64_SSBD_FORCE_ENABLE		2
-#define ARM64_SSBD_MITIGATED		3
-
-static inline int arm64_get_ssbd_state(void)
-{
-	extern int ssbd_state;
-	return ssbd_state;
-}
-
-void arm64_set_ssbd_mitigation(bool state);
-
 extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
 
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 7e9caef13db43f..6c8303559bebe2 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -106,8 +106,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
 		sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
 }
 
-int ssbd_state __read_mostly = ARM64_SSBD_UNKNOWN;
-
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 

From 9ef2b48be9bba70ded9372fc81e678e707306060 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 28 Sep 2020 11:45:24 +0100
Subject: [PATCH 212/262] KVM: arm64: Allow patching EL2 vectors even with
 KASLR is not enabled

Patching the EL2 exception vectors is integral to the Spectre-v2
workaround, where it can be necessary to execute CPU-specific sequences
to nobble the branch predictor before running the hypervisor text proper.

Remove the dependency on CONFIG_RANDOMIZE_BASE and allow the EL2 vectors
to be patched even when KASLR is not enabled.

Fixes: 7a132017e7a5 ("KVM: arm64: Replace CONFIG_KVM_INDIRECT_VECTORS with CONFIG_RANDOMIZE_BASE")
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/202009221053.Jv1XsQUZ%lkp@intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kvm_asm.h |  2 --
 arch/arm64/include/asm/kvm_mmu.h | 51 +-------------------------------
 arch/arm64/kernel/proton-pack.c  |  2 --
 arch/arm64/kvm/arm.c             | 34 +++++++++++++++++++++
 arch/arm64/kvm/hyp/Makefile      |  3 +-
 arch/arm64/kvm/hyp/hyp-entry.S   |  2 --
 6 files changed, 36 insertions(+), 58 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index abe02cf6688088..7f7072f6cb45f6 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -99,11 +99,9 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
 #define __kvm_hyp_init		CHOOSE_NVHE_SYM(__kvm_hyp_init)
 #define __kvm_hyp_vector	CHOOSE_HYP_SYM(__kvm_hyp_vector)
 
-#ifdef CONFIG_RANDOMIZE_BASE
 extern atomic_t arm64_el2_vector_last_slot;
 DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
 #define __bp_harden_hyp_vecs	CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
-#endif
 
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 36606ef9e43595..cff1cebc759048 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -9,6 +9,7 @@
 
 #include <asm/page.h>
 #include <asm/memory.h>
+#include <asm/mmu.h>
 #include <asm/cpufeature.h>
 
 /*
@@ -430,7 +431,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
 	return ret;
 }
 
-#ifdef CONFIG_RANDOMIZE_BASE
 /*
  * EL2 vectors can be mapped and rerouted in a number of ways,
  * depending on the kernel configuration and CPU present:
@@ -451,12 +451,9 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa,
  * VHE, as we don't have hypervisor-specific mappings. If the system
  * is VHE and yet selects this capability, it will be ignored.
  */
-#include <asm/mmu.h>
-
 extern void *__kvm_bp_vect_base;
 extern int __kvm_harden_el2_vector_slot;
 
-/*  This is called on both VHE and !VHE systems */
 static inline void *kvm_get_hyp_vector(void)
 {
 	struct bp_hardening_data *data = arm64_get_bp_hardening_data();
@@ -480,52 +477,6 @@ static inline void *kvm_get_hyp_vector(void)
 	return vect;
 }
 
-/*  This is only called on a !VHE system */
-static inline int kvm_map_vectors(void)
-{
-	/*
-	 * SV2  = ARM64_SPECTRE_V2
-	 * HEL2 = ARM64_HARDEN_EL2_VECTORS
-	 *
-	 * !SV2 + !HEL2 -> use direct vectors
-	 *  SV2 + !HEL2 -> use hardened vectors in place
-	 * !SV2 +  HEL2 -> allocate one vector slot and use exec mapping
-	 *  SV2 +  HEL2 -> use hardened vertors and use exec mapping
-	 */
-	if (cpus_have_const_cap(ARM64_SPECTRE_V2)) {
-		__kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs);
-		__kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
-	}
-
-	if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
-		phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs);
-		unsigned long size = __BP_HARDEN_HYP_VECS_SZ;
-
-		/*
-		 * Always allocate a spare vector slot, as we don't
-		 * know yet which CPUs have a BP hardening slot that
-		 * we can reuse.
-		 */
-		__kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
-		BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
-		return create_hyp_exec_mappings(vect_pa, size,
-						&__kvm_bp_vect_base);
-	}
-
-	return 0;
-}
-#else
-static inline void *kvm_get_hyp_vector(void)
-{
-	return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
-}
-
-static inline int kvm_map_vectors(void)
-{
-	return 0;
-}
-#endif
-
 #define kvm_phys_to_vttbr(addr)		phys_to_ttbr(addr)
 
 /*
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index b1ea935fd9481f..1fbaa0240d4c1d 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -177,7 +177,6 @@ enum mitigation_state arm64_get_spectre_v2_state(void)
 }
 
 #ifdef CONFIG_KVM
-#ifdef CONFIG_RANDOMIZE_BASE
 #include <asm/cacheflush.h>
 #include <asm/kvm_asm.h>
 
@@ -235,7 +234,6 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn)
 {
 	__this_cpu_write(bp_hardening_data.fn, fn);
 }
-#endif	/* CONFIG_RANDOMIZE_BASE */
 #endif	/* CONFIG_KVM */
 
 static void call_smc_arch_workaround_1(void)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9b90a701462c21..13e559ac7235b1 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1256,6 +1256,40 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 }
 
+static int kvm_map_vectors(void)
+{
+	/*
+	 * SV2  = ARM64_SPECTRE_V2
+	 * HEL2 = ARM64_HARDEN_EL2_VECTORS
+	 *
+	 * !SV2 + !HEL2 -> use direct vectors
+	 *  SV2 + !HEL2 -> use hardened vectors in place
+	 * !SV2 +  HEL2 -> allocate one vector slot and use exec mapping
+	 *  SV2 +  HEL2 -> use hardened vectors and use exec mapping
+	 */
+	if (cpus_have_const_cap(ARM64_SPECTRE_V2)) {
+		__kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs);
+		__kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
+	}
+
+	if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+		phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs);
+		unsigned long size = __BP_HARDEN_HYP_VECS_SZ;
+
+		/*
+		 * Always allocate a spare vector slot, as we don't
+		 * know yet which CPUs have a BP hardening slot that
+		 * we can reuse.
+		 */
+		__kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+		BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
+		return create_hyp_exec_mappings(vect_pa, size,
+						&__kvm_bp_vect_base);
+	}
+
+	return 0;
+}
+
 static void cpu_init_hyp_mode(void)
 {
 	phys_addr_t pgd_ptr;
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 89d2bf73acb518..d898f0da5802e3 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,5 +10,4 @@ subdir-ccflags-y := -I$(incdir)				\
 		    -DDISABLE_BRANCH_PROFILING		\
 		    $(DISABLE_STACKLEAK_PLUGIN)
 
-obj-$(CONFIG_KVM) += vhe/ nvhe/
-obj-$(CONFIG_RANDOMIZE_BASE) += smccc_wa.o
+obj-$(CONFIG_KVM) += vhe/ nvhe/ smccc_wa.o
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 3cca756e9e412c..7ea277b829673a 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -259,7 +259,6 @@ SYM_CODE_START(__kvm_hyp_vector)
 	valid_vect	el1_error		// Error 32-bit EL1
 SYM_CODE_END(__kvm_hyp_vector)
 
-#ifdef CONFIG_RANDOMIZE_BASE
 .macro hyp_ventry
 	.align 7
 1:	esb
@@ -309,4 +308,3 @@ SYM_CODE_START(__bp_harden_hyp_vecs)
 1:	.org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ
 	.org 1b
 SYM_CODE_END(__bp_harden_hyp_vecs)
-#endif

From 5c8b0cbd9d6bac5f40943b5a7d8eac8cb86cbe7f Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 28 Sep 2020 14:06:50 +0100
Subject: [PATCH 213/262] arm64: Pull in task_stack_page() to Spectre-v4
 mitigation code

The kbuild robot reports that we're relying on an implicit inclusion to
get a definition of task_stack_page() in the Spectre-v4 mitigation code,
which is not always in place for some configurations:

  | arch/arm64/kernel/proton-pack.c:329:2: error: implicit declaration of function 'task_stack_page' [-Werror,-Wimplicit-function-declaration]
  |         task_pt_regs(task)->pstate |= val;
  |         ^
  | arch/arm64/include/asm/processor.h:268:36: note: expanded from macro 'task_pt_regs'
  |         ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
  |                                           ^
  | arch/arm64/kernel/proton-pack.c:329:2: note: did you mean 'task_spread_page'?

Add the missing include to fix the build error.

Fixes: a44acf477220 ("arm64: Move SSBD prctl() handler alongside other spectre mitigation code")
Reported-by: Anthony Steinhauser <asteinhauser@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/202009260013.Ul7AD29w%lkp@intel.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/proton-pack.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 1fbaa0240d4c1d..16c9a095a74684 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -22,6 +22,7 @@
 #include <linux/device.h>
 #include <linux/nospec.h>
 #include <linux/prctl.h>
+#include <linux/sched/task_stack.h>
 
 #include <asm/spectre.h>
 #include <asm/traps.h>

From 780c083a8f840ca9162c7a4090ff5e10d15152a2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 28 Sep 2020 14:03:00 +0100
Subject: [PATCH 214/262] arm64: Add support for PR_SPEC_DISABLE_NOEXEC prctl()
 option

The PR_SPEC_DISABLE_NOEXEC option to the PR_SPEC_STORE_BYPASS prctl()
allows the SSB mitigation to be enabled only until the next execve(),
at which point the state will revert back to PR_SPEC_ENABLE and the
mitigation will be disabled.

Add support for PR_SPEC_DISABLE_NOEXEC on arm64.

Reported-by: Anthony Steinhauser <asteinhauser@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/process.c     |  6 ++++++
 arch/arm64/kernel/proton-pack.c | 38 +++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 9dbd35b95253a7..085d8ca39e47df 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -21,6 +21,7 @@
 #include <linux/lockdep.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/nospec.h>
 #include <linux/stddef.h>
 #include <linux/sysctl.h>
 #include <linux/unistd.h>
@@ -609,6 +610,11 @@ void arch_setup_new_exec(void)
 	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
 
 	ptrauth_thread_init_user(current);
+
+	if (task_spec_ssb_noexec(current)) {
+		arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
+					 PR_SPEC_ENABLE);
+	}
 }
 
 #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c
index 16c9a095a74684..68b710f1b43f34 100644
--- a/arch/arm64/kernel/proton-pack.c
+++ b/arch/arm64/kernel/proton-pack.c
@@ -660,6 +660,20 @@ void spectre_v4_enable_task_mitigation(struct task_struct *tsk)
  * prctl() may be necessary even when PSTATE.SSBS can be toggled directly
  * from userspace.
  */
+static void ssbd_prctl_enable_mitigation(struct task_struct *task)
+{
+	task_clear_spec_ssb_noexec(task);
+	task_set_spec_ssb_disable(task);
+	set_tsk_thread_flag(task, TIF_SSBD);
+}
+
+static void ssbd_prctl_disable_mitigation(struct task_struct *task)
+{
+	task_clear_spec_ssb_noexec(task);
+	task_clear_spec_ssb_disable(task);
+	clear_tsk_thread_flag(task, TIF_SSBD);
+}
+
 static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
 	switch (ctrl) {
@@ -679,8 +693,7 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
 		if (spectre_v4_mitigations_on())
 			return -EPERM;
 
-		task_clear_spec_ssb_disable(task);
-		clear_tsk_thread_flag(task, TIF_SSBD);
+		ssbd_prctl_disable_mitigation(task);
 		break;
 	case PR_SPEC_FORCE_DISABLE:
 		/* Force disable speculation: force enable mitigation */
@@ -699,8 +712,22 @@ static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl)
 		if (spectre_v4_mitigations_off())
 			return -EPERM;
 
-		task_set_spec_ssb_disable(task);
-		set_tsk_thread_flag(task, TIF_SSBD);
+		ssbd_prctl_enable_mitigation(task);
+		break;
+	case PR_SPEC_DISABLE_NOEXEC:
+		/* Disable speculation until execve(): enable mitigation */
+		/*
+		 * If the mitigation state is forced one way or the other, then
+		 * we must fail now before we try to toggle it on execve().
+		 */
+		if (task_spec_ssb_force_disable(task) ||
+		    spectre_v4_mitigations_off() ||
+		    spectre_v4_mitigations_on()) {
+			return -EPERM;
+		}
+
+		ssbd_prctl_enable_mitigation(task);
+		task_set_spec_ssb_noexec(task);
 		break;
 	default:
 		return -ERANGE;
@@ -745,6 +772,9 @@ static int ssbd_prctl_get(struct task_struct *task)
 	if (task_spec_ssb_force_disable(task))
 		return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
 
+	if (task_spec_ssb_noexec(task))
+		return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
+
 	if (task_spec_ssb_disable(task))
 		return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
 

From fd258dc4442c5c1c069c6b5b42bfe7d10cddda95 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 29 Sep 2020 19:13:12 -0700
Subject: [PATCH 215/262] x86/mce: Add Skylake quirk for patrol scrub reported
 errors

The patrol scrubber in Skylake and Cascade Lake systems can be configured
to report uncorrected errors using a special signature in the machine
check bank and to signal using CMCI instead of machine check.

Update the severity calculation mechanism to allow specifying the model,
minimum stepping and range of machine check bank numbers.

Add a new rule to detect the special signature (on model 0x55, stepping
>=4 in any of the memory controller banks).

 [ bp: Rewrite it.
   aegl: Productize it. ]

Suggested-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200930021313.31810-2-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/severity.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index e1da619add1921..567ce09a02868d 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -9,9 +9,11 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/debugfs.h>
-#include <asm/mce.h>
 #include <linux/uaccess.h>
 
+#include <asm/mce.h>
+#include <asm/intel-family.h>
+
 #include "internal.h"
 
 /*
@@ -40,9 +42,14 @@ static struct severity {
 	unsigned char context;
 	unsigned char excp;
 	unsigned char covered;
+	unsigned char cpu_model;
+	unsigned char cpu_minstepping;
+	unsigned char bank_lo, bank_hi;
 	char *msg;
 } severities[] = {
 #define MCESEV(s, m, c...) { .sev = MCE_ ## s ## _SEVERITY, .msg = m, ## c }
+#define BANK_RANGE(l, h) .bank_lo = l, .bank_hi = h
+#define MODEL_STEPPING(m, s) .cpu_model = m, .cpu_minstepping = s
 #define  KERNEL		.context = IN_KERNEL
 #define  USER		.context = IN_USER
 #define  KERNEL_RECOV	.context = IN_KERNEL_RECOV
@@ -97,7 +104,6 @@ static struct severity {
 		KEEP, "Corrected error",
 		NOSER, BITCLR(MCI_STATUS_UC)
 		),
-
 	/*
 	 * known AO MCACODs reported via MCE or CMC:
 	 *
@@ -113,6 +119,18 @@ static struct severity {
 		AO, "Action optional: last level cache writeback error",
 		SER, MASK(MCI_UC_AR|MCACOD, MCI_STATUS_UC|MCACOD_L3WB)
 		),
+	/*
+	 * Quirk for Skylake/Cascade Lake. Patrol scrubber may be configured
+	 * to report uncorrected errors using CMCI with a special signature.
+	 * UC=0, MSCOD=0x0010, MCACOD=binary(000X 0000 1100 XXXX) reported
+	 * in one of the memory controller banks.
+	 * Set severity to "AO" for same action as normal patrol scrub error.
+	 */
+	MCESEV(
+		AO, "Uncorrected Patrol Scrub Error",
+		SER, MASK(MCI_STATUS_UC|MCI_ADDR|0xffffeff0, MCI_ADDR|0x001000c0),
+		MODEL_STEPPING(INTEL_FAM6_SKYLAKE_X, 4), BANK_RANGE(13, 18)
+	),
 
 	/* ignore OVER for UCNA */
 	MCESEV(
@@ -324,6 +342,12 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
 			continue;
 		if (s->excp && excp != s->excp)
 			continue;
+		if (s->cpu_model && boot_cpu_data.x86_model != s->cpu_model)
+			continue;
+		if (s->cpu_minstepping && boot_cpu_data.x86_stepping < s->cpu_minstepping)
+			continue;
+		if (s->bank_lo && (m->bank < s->bank_lo || m->bank > s->bank_hi))
+			continue;
 		if (msg)
 			*msg = s->msg;
 		s->covered = 1;

From ed9705e4ad1c19ae51ed0cb4c112f9eb6dfc69fc Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 29 Sep 2020 19:13:13 -0700
Subject: [PATCH 216/262] x86/mce: Drop AMD-specific "DEFERRED" case from Intel
 severity rule list

Way back in v3.19 Intel and AMD shared the same machine check severity
grading code. So it made sense to add a case for AMD DEFERRED errors in
commit

  e3480271f592 ("x86, mce, severity: Extend the the mce_severity mechanism to handle UCNA/DEFERRED error")

But later in v4.2 AMD switched to a separate grading function in
commit

  bf80bbd7dcf5 ("x86/mce: Add an AMD severities-grading function")

Belatedly drop the DEFERRED case from the Intel rule list.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20200930021313.31810-3-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/severity.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 567ce09a02868d..e0722461bb579a 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -96,10 +96,6 @@ static struct severity {
 		PANIC, "In kernel and no restart IP",
 		EXCP, KERNEL_RECOV, MCGMASK(MCG_STATUS_RIPV, 0)
 		),
-	MCESEV(
-		DEFERRED, "Deferred error",
-		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
-		),
 	MCESEV(
 		KEEP, "Corrected error",
 		NOSER, BITCLR(MCI_STATUS_UC)

From 6a1bdb173f9967b2329aab0f25bcba963f54e06b Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 30 Sep 2020 13:20:40 +0100
Subject: [PATCH 217/262] arm64: mm: Make flush_tlb_fix_spurious_fault() a
 no-op

Our use of broadcast TLB maintenance means that spurious page-faults
that have been handled already by another CPU do not require additional
TLB maintenance.

Make flush_tlb_fix_spurious_fault() a no-op and rely on the existing TLB
invalidation instead. Add an explicit flush_tlb_page() when making a page
dirty, as the TLB is permitted to cache the old read-only entry.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20200728092220.GA21800@willie-the-truck
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 8 ++++++++
 arch/arm64/mm/fault.c            | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bc68da9f5706e4..02ad3105c14c62 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -50,6 +50,14 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 	__flush_tlb_range(vma, addr, end, PUD_SIZE, false, 1)
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+/*
+ * Outside of a few very special situations (e.g. hibernation), we always
+ * use broadcast TLB invalidation instructions, therefore a spurious page
+ * fault on one CPU which has been handled concurrently by another CPU
+ * does not need to perform additional invalidation.
+ */
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
+
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index f07333e86c2ff9..a696a7921da466 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -218,7 +218,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 		pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
 	} while (pteval != old_pteval);
 
-	flush_tlb_fix_spurious_fault(vma, address);
+	/* Invalidate a stale read-only entry */
+	if (dirty)
+		flush_tlb_page(vma, address);
 	return 1;
 }
 

From 80d6b466679c3dced3b359a6379c6d913de39afd Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 1 Oct 2020 09:48:21 +0100
Subject: [PATCH 218/262] arm64: dbm: Invalidate local TLB when setting
 TCR_EL1.HD

TCR_EL1.HD is permitted to be cached in a TLB, so invalidate the local
TLB after setting the bit when detected support for the feature. Although
this isn't strictly necessary, since we can happily operate with the bit
effectively clear, the current code uses an ISB in a half-hearted attempt
to make the change effective, so let's just fix that up.

Link: https://lore.kernel.org/r/20201001110405.18617-1-will@kernel.org
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6424584be01e6d..a474a4f39c951b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1443,6 +1443,7 @@ static inline void __cpu_enable_hw_dbm(void)
 
 	write_sysreg(tcr, tcr_el1);
 	isb();
+	local_flush_tlb_all();
 }
 
 static bool cpu_has_broken_dbm(void)

From f94c91f7ba3ba7de2bc8aa31be28e1abb22f849e Mon Sep 17 00:00:00 2001
From: Libing Zhou <libing.zhou@nokia-sbell.com>
Date: Thu, 20 Aug 2020 10:56:41 +0800
Subject: [PATCH 219/262] x86/nmi: Fix nmi_handle() duration miscalculation

When nmi_check_duration() is checking the time an NMI handler took to
execute, the whole_msecs value used should be read from the @duration
argument, not from the ->max_duration, the latter being used to store
the current maximal duration.

 [ bp: Rewrite commit message. ]

Fixes: 248ed51048c4 ("x86/nmi: Remove irq_work from the long duration NMI handler")
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Libing Zhou <libing.zhou@nokia-sbell.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Changbin Du <changbin.du@gmail.com>
Link: https://lkml.kernel.org/r/20200820025641.44075-1-libing.zhou@nokia-sbell.com
---
 arch/x86/kernel/nmi.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4fc9954a956003..47381666d6a55b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -102,7 +102,6 @@ fs_initcall(nmi_warning_debugfs);
 
 static void nmi_check_duration(struct nmiaction *action, u64 duration)
 {
-	u64 whole_msecs = READ_ONCE(action->max_duration);
 	int remainder_ns, decimal_msecs;
 
 	if (duration < nmi_longest_ns || duration < action->max_duration)
@@ -110,12 +109,12 @@ static void nmi_check_duration(struct nmiaction *action, u64 duration)
 
 	action->max_duration = duration;
 
-	remainder_ns = do_div(whole_msecs, (1000 * 1000));
+	remainder_ns = do_div(duration, (1000 * 1000));
 	decimal_msecs = remainder_ns / 1000;
 
 	printk_ratelimited(KERN_INFO
 		"INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
-		action->handler, whole_msecs, decimal_msecs);
+		action->handler, duration, decimal_msecs);
 }
 
 static int nmi_handle(unsigned int type, struct pt_regs *regs)

From a0947081af2ac9549e6ba19877456730713bde23 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 1 Oct 2020 09:56:08 -0500
Subject: [PATCH 220/262] x86/uv/time: Use a flexible array in struct
 uv_rtc_timer_head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having
a dynamically sized set of trailing elements in a structure. Kernel code
should always use “flexible array members”[1] for these cases. The
older style of one-element or zero-length arrays should no longer be
used[2].

struct uv_rtc_timer_head contains a one-element array cpu[1]. Switch it
to a flexible array and use the struct_size() helper to calculate the
allocation size. Also, save some heap space in the process[3].

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9-rc1/process/deprecated.html#zero-length-and-one-element-arrays
[3] https://lore.kernel.org/lkml/20200518190114.GA7757@embeddedor/

 [ bp: Massage a bit. ]

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201001145608.GA10204@embeddedor
---
 arch/x86/platform/uv/uv_time.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index f82a1337a608d6..6c348c2d0def56 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -52,7 +52,7 @@ struct uv_rtc_timer_head {
 	struct {
 		int	lcpu;		/* systemwide logical cpu number */
 		u64	expires;	/* next timer expiration for this cpu */
-	} cpu[1];
+	} cpu[];
 };
 
 /*
@@ -148,9 +148,8 @@ static __init int uv_rtc_allocate_timers(void)
 		struct uv_rtc_timer_head *head = blade_info[bid];
 
 		if (!head) {
-			head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
-				(uv_blade_nr_possible_cpus(bid) *
-					2 * sizeof(u64)),
+			head = kmalloc_node(struct_size(head, cpu,
+				uv_blade_nr_possible_cpus(bid)),
 				GFP_KERNEL, nid);
 			if (!head) {
 				uv_rtc_deallocate_timers();

From d5ae56a4fe5178e289e763450362d87800f9dcac Mon Sep 17 00:00:00 2001
From: Masahisa Kojima <masahisa.kojima@linaro.org>
Date: Tue, 28 Jul 2020 12:14:31 +0900
Subject: [PATCH 221/262] tpm: tis: add support for MMIO TPM on SynQuacer

When fitted, the SynQuacer platform exposes its SPI TPM via a MMIO
window that is backed by the SPI command sequencer in the SPI bus
controller. This arrangement has the limitation that only byte size
accesses are supported, and so we'll need to provide a separate module
that take this into account.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Masahisa Kojima <masahisa.kojima@linaro.org>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/Kconfig             |  12 ++
 drivers/char/tpm/Makefile            |   1 +
 drivers/char/tpm/tpm_tis_synquacer.c | 208 +++++++++++++++++++++++++++
 3 files changed, 221 insertions(+)
 create mode 100644 drivers/char/tpm/tpm_tis_synquacer.c

diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 58b4c573d1762a..a18c314da21118 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -74,6 +74,18 @@ config TCG_TIS_SPI_CR50
 	  If you have a H1 secure module running Cr50 firmware on SPI bus,
 	  say Yes and it will be accessible from within Linux.
 
+config TCG_TIS_SYNQUACER
+	tristate "TPM Interface Specification 1.2 Interface / TPM 2.0 FIFO Interface (MMIO - SynQuacer)"
+	depends on ARCH_SYNQUACER
+	select TCG_TIS_CORE
+	help
+	  If you have a TPM security chip that is compliant with the
+	  TCG TIS 1.2 TPM specification (TPM1.2) or the TCG PTP FIFO
+	  specification (TPM2.0) say Yes and it will be accessible from
+	  within Linux on Socionext SynQuacer platform.
+	  To compile this driver as a module, choose  M here;
+	  the module will be called tpm_tis_synquacer.
+
 config TCG_TIS_I2C_ATMEL
 	tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
 	depends on I2C
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index 9567e5197f740f..84db4fb3a9c92d 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -21,6 +21,7 @@ tpm-$(CONFIG_EFI) += eventlog/efi.o
 tpm-$(CONFIG_OF) += eventlog/of.o
 obj-$(CONFIG_TCG_TIS_CORE) += tpm_tis_core.o
 obj-$(CONFIG_TCG_TIS) += tpm_tis.o
+obj-$(CONFIG_TCG_TIS_SYNQUACER) += tpm_tis_synquacer.o
 
 obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi.o
 tpm_tis_spi-y := tpm_tis_spi_main.o
diff --git a/drivers/char/tpm/tpm_tis_synquacer.c b/drivers/char/tpm/tpm_tis_synquacer.c
new file mode 100644
index 00000000000000..e47bdd27270492
--- /dev/null
+++ b/drivers/char/tpm/tpm_tis_synquacer.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Linaro Ltd.
+ *
+ * This device driver implements MMIO TPM on SynQuacer Platform.
+ */
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/kernel.h>
+#include "tpm.h"
+#include "tpm_tis_core.h"
+
+/*
+ * irq > 0 means: use irq $irq;
+ * irq = 0 means: autoprobe for an irq;
+ * irq = -1 means: no irq support
+ */
+struct tpm_tis_synquacer_info {
+	struct resource res;
+	int irq;
+};
+
+struct tpm_tis_synquacer_phy {
+	struct tpm_tis_data priv;
+	void __iomem *iobase;
+};
+
+static inline struct tpm_tis_synquacer_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *data)
+{
+	return container_of(data, struct tpm_tis_synquacer_phy, priv);
+}
+
+static int tpm_tis_synquacer_read_bytes(struct tpm_tis_data *data, u32 addr,
+					u16 len, u8 *result)
+{
+	struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
+
+	while (len--)
+		*result++ = ioread8(phy->iobase + addr);
+
+	return 0;
+}
+
+static int tpm_tis_synquacer_write_bytes(struct tpm_tis_data *data, u32 addr,
+					 u16 len, const u8 *value)
+{
+	struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
+
+	while (len--)
+		iowrite8(*value++, phy->iobase + addr);
+
+	return 0;
+}
+
+static int tpm_tis_synquacer_read16_bw(struct tpm_tis_data *data,
+				       u32 addr, u16 *result)
+{
+	struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
+
+	/*
+	 * Due to the limitation of SPI controller on SynQuacer,
+	 * 16/32 bits access must be done in byte-wise and descending order.
+	 */
+	*result = (ioread8(phy->iobase + addr + 1) << 8) |
+		  (ioread8(phy->iobase + addr));
+
+	return 0;
+}
+
+static int tpm_tis_synquacer_read32_bw(struct tpm_tis_data *data,
+				       u32 addr, u32 *result)
+{
+	struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
+
+	/*
+	 * Due to the limitation of SPI controller on SynQuacer,
+	 * 16/32 bits access must be done in byte-wise and descending order.
+	 */
+	*result = (ioread8(phy->iobase + addr + 3) << 24) |
+		  (ioread8(phy->iobase + addr + 2) << 16) |
+		  (ioread8(phy->iobase + addr + 1) << 8) |
+		  (ioread8(phy->iobase + addr));
+
+	return 0;
+}
+
+static int tpm_tis_synquacer_write32_bw(struct tpm_tis_data *data,
+					u32 addr, u32 value)
+{
+	struct tpm_tis_synquacer_phy *phy = to_tpm_tis_tcg_phy(data);
+
+	/*
+	 * Due to the limitation of SPI controller on SynQuacer,
+	 * 16/32 bits access must be done in byte-wise and descending order.
+	 */
+	iowrite8(value >> 24, phy->iobase + addr + 3);
+	iowrite8(value >> 16, phy->iobase + addr + 2);
+	iowrite8(value >> 8, phy->iobase + addr + 1);
+	iowrite8(value, phy->iobase + addr);
+
+	return 0;
+}
+
+static const struct tpm_tis_phy_ops tpm_tcg_bw = {
+	.read_bytes	= tpm_tis_synquacer_read_bytes,
+	.write_bytes	= tpm_tis_synquacer_write_bytes,
+	.read16		= tpm_tis_synquacer_read16_bw,
+	.read32		= tpm_tis_synquacer_read32_bw,
+	.write32	= tpm_tis_synquacer_write32_bw,
+};
+
+static int tpm_tis_synquacer_init(struct device *dev,
+				  struct tpm_tis_synquacer_info *tpm_info)
+{
+	struct tpm_tis_synquacer_phy *phy;
+
+	phy = devm_kzalloc(dev, sizeof(struct tpm_tis_synquacer_phy), GFP_KERNEL);
+	if (phy == NULL)
+		return -ENOMEM;
+
+	phy->iobase = devm_ioremap_resource(dev, &tpm_info->res);
+	if (IS_ERR(phy->iobase))
+		return PTR_ERR(phy->iobase);
+
+	return tpm_tis_core_init(dev, &phy->priv, tpm_info->irq, &tpm_tcg_bw,
+				 ACPI_HANDLE(dev));
+}
+
+static SIMPLE_DEV_PM_OPS(tpm_tis_synquacer_pm, tpm_pm_suspend, tpm_tis_resume);
+
+static int tpm_tis_synquacer_probe(struct platform_device *pdev)
+{
+	struct tpm_tis_synquacer_info tpm_info = {};
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "no memory resource defined\n");
+		return -ENODEV;
+	}
+	tpm_info.res = *res;
+
+	tpm_info.irq = -1;
+
+	return tpm_tis_synquacer_init(&pdev->dev, &tpm_info);
+}
+
+static int tpm_tis_synquacer_remove(struct platform_device *pdev)
+{
+	struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
+
+	tpm_chip_unregister(chip);
+	tpm_tis_remove(chip);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tis_synquacer_of_platform_match[] = {
+	{.compatible = "socionext,synquacer-tpm-mmio"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, tis_synquacer_of_platform_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id tpm_synquacer_acpi_tbl[] = {
+	{ "SCX0009" },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, tpm_synquacer_acpi_tbl);
+#endif
+
+static struct platform_driver tis_synquacer_drv = {
+	.probe = tpm_tis_synquacer_probe,
+	.remove = tpm_tis_synquacer_remove,
+	.driver = {
+		.name		= "tpm_tis_synquacer",
+		.pm		= &tpm_tis_synquacer_pm,
+		.of_match_table = of_match_ptr(tis_synquacer_of_platform_match),
+		.acpi_match_table = ACPI_PTR(tpm_synquacer_acpi_tbl),
+	},
+};
+
+static int __init tpm_tis_synquacer_module_init(void)
+{
+	int rc;
+
+	rc = platform_driver_register(&tis_synquacer_drv);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+static void __exit tpm_tis_synquacer_module_exit(void)
+{
+	platform_driver_unregister(&tis_synquacer_drv);
+}
+
+module_init(tpm_tis_synquacer_module_init);
+module_exit(tpm_tis_synquacer_module_exit);
+MODULE_DESCRIPTION("TPM MMIO Driver for Socionext SynQuacer platform");
+MODULE_LICENSE("GPL");

From a4c3049784551f6622be7f9a0a2dc993d4ba3149 Mon Sep 17 00:00:00 2001
From: Masahisa Kojima <masahisa.kojima@linaro.org>
Date: Tue, 28 Jul 2020 12:14:32 +0900
Subject: [PATCH 222/262] dt-bindings: Add SynQucer TPM MMIO as a trivial
 device

Add a compatible string for the SynQuacer TPM to the binding for a
TPM exposed via a memory mapped TIS frame. The MMIO window behaves
slightly differently on this hardware, so it requires its own
identifier.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Masahisa Kojima <masahisa.kojima@linaro.org>
Acked-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 4ace8039840ad0..25c4239ebbfbcc 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -326,6 +326,8 @@ properties:
           - silabs,si7020
             # Skyworks SKY81452: Six-Channel White LED Driver with Touch Panel Bias Supply
           - skyworks,sky81452
+            # Socionext SynQuacer TPM MMIO module
+          - socionext,synquacer-tpm-mmio
             # i2c serial eeprom  (24cxx)
           - st,24c256
             # Ambient Light Sensor with SMBUS/Two Wire Serial Interface

From e08c6d3b19930f84a8da8c18710eefdfa48a9ceb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Jul 2020 19:16:13 +0300
Subject: [PATCH 223/262] tpm: use %*ph to print small buffer

Use %*ph format to print small buffer as hex string.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-sysfs.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index d52bf4df0bca96..e2ff0b273a0f1d 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -56,31 +56,20 @@ static ssize_t pubek_show(struct device *dev, struct device_attribute *attr,
 	out = (struct tpm_readpubek_out *)&tpm_buf.data[10];
 	str +=
 	    sprintf(str,
-		    "Algorithm: %02X %02X %02X %02X\n"
-		    "Encscheme: %02X %02X\n"
-		    "Sigscheme: %02X %02X\n"
-		    "Parameters: %02X %02X %02X %02X "
-		    "%02X %02X %02X %02X "
-		    "%02X %02X %02X %02X\n"
+		    "Algorithm: %4ph\n"
+		    "Encscheme: %2ph\n"
+		    "Sigscheme: %2ph\n"
+		    "Parameters: %12ph\n"
 		    "Modulus length: %d\n"
 		    "Modulus:\n",
-		    out->algorithm[0], out->algorithm[1], out->algorithm[2],
-		    out->algorithm[3],
-		    out->encscheme[0], out->encscheme[1],
-		    out->sigscheme[0], out->sigscheme[1],
-		    out->parameters[0], out->parameters[1],
-		    out->parameters[2], out->parameters[3],
-		    out->parameters[4], out->parameters[5],
-		    out->parameters[6], out->parameters[7],
-		    out->parameters[8], out->parameters[9],
-		    out->parameters[10], out->parameters[11],
+		    out->algorithm,
+		    out->encscheme,
+		    out->sigscheme,
+		    out->parameters,
 		    be32_to_cpu(out->keysize));
 
-	for (i = 0; i < 256; i++) {
-		str += sprintf(str, "%02X ", out->modulus[i]);
-		if ((i + 1) % 16 == 0)
-			str += sprintf(str, "\n");
-	}
+	for (i = 0; i < 256; i += 16)
+		str += sprintf(str, "%16ph\n", &out->modulus[i]);
 
 out_buf:
 	tpm_buf_destroy(&tpm_buf);

From 55707d531af62bd87c1b7e8d66fd9798094f9b3b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Mon, 28 Sep 2020 11:00:12 -0700
Subject: [PATCH 224/262] tpm_tis: Add a check for invalid status

Some TIS based TPMs can return 0xff to status reads if the locality
hasn't been properly requested.  Detect this condition by checking the
bits that the TIS spec specifies must return zero are clear and return
zero in that case.  Also drop a warning so the problem can be
identified in the calling path and fixed (usually a missing
try_get_ops()).

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm_tis_core.c | 11 +++++++++++
 drivers/char/tpm/tpm_tis_core.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 65ab1b027949c7..92c51c6cfd1b78 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -239,6 +239,17 @@ static u8 tpm_tis_status(struct tpm_chip *chip)
 	if (rc < 0)
 		return 0;
 
+	if (unlikely((status & TPM_STS_READ_ZERO) != 0)) {
+		/*
+		 * If this trips, the chances are the read is
+		 * returning 0xff because the locality hasn't been
+		 * acquired.  Usually because tpm_try_get_ops() hasn't
+		 * been called before doing a TPM operation.
+		 */
+		WARN_ONCE(1, "TPM returned invalid status\n");
+		return 0;
+	}
+
 	return status;
 }
 
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 7337819f5d7b5d..9b2d32a59f6704 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -34,6 +34,7 @@ enum tis_status {
 	TPM_STS_GO = 0x20,
 	TPM_STS_DATA_AVAIL = 0x10,
 	TPM_STS_DATA_EXPECT = 0x08,
+	TPM_STS_READ_ZERO = 0x23, /* bits that must be zero on read */
 };
 
 enum tis_int_flags {

From d9ef632fab9ba81b708763bcbcfdbea9a55c95d2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 1 Oct 2020 11:54:54 +0100
Subject: [PATCH 225/262] perf: arm-cmn: Fix unsigned comparison to less than
 zero

Ensure that the 'irq' field of 'struct arm_cmn_dtc' is a signed int
so that it can be compared '< 0'.

Link: https://lore.kernel.org/r/20200929170835.GA15956@embeddedor
Addresses-Coverity-ID: 1497488 ("Unsigned compared against 0")
Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver")
Reported-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index e824b5b83ea25f..cd4da4c5dac043 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -217,7 +217,7 @@ struct arm_cmn_node {
 
 struct arm_cmn_dtc {
 	void __iomem *base;
-	unsigned int irq;
+	int irq;
 	int irq_friend;
 	bool cc_active;
 

From 887e2cff0f8dc4dac4c35e631b711325c6dd65b8 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 1 Oct 2020 11:57:01 +0100
Subject: [PATCH 226/262] perf: arm-cmn: Fix conversion specifiers for node
 type

The node type field is an enum type, so print it as a 32-bit quantity
rather than as an unsigned short.

Link: https://lore.kernel.org/r/202009302350.QIzfkx62-lkp@intel.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index cd4da4c5dac043..a76ff594f3ca41 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1324,7 +1324,7 @@ static void arm_cmn_init_node_info(struct arm_cmn *cmn, u32 offset, struct arm_c
 	else
 		level = 2;
 
-	dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6hx id:%-4hd off:%#x\n",
+	dev_dbg(cmn->dev, "node%*c%#06hx%*ctype:%-#6x id:%-4hd off:%#x\n",
 			(level * 2) + 1, ' ', node->id, 5 - (level * 2), ' ',
 			node->type, node->logid, offset);
 }
@@ -1430,7 +1430,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
 				break;
 			/* Something has gone horribly wrong */
 			default:
-				dev_err(cmn->dev, "invalid device node type: 0x%hx\n", dn->type);
+				dev_err(cmn->dev, "invalid device node type: 0x%x\n", dn->type);
 				return -ENODEV;
 			}
 		}

From 238c91115cd05c71447ea071624a4c9fe661f970 Mon Sep 17 00:00:00 2001
From: Mark Mossberg <mark.mossberg@gmail.com>
Date: Fri, 2 Oct 2020 04:29:16 +0000
Subject: [PATCH 227/262] x86/dumpstack: Fix misleading instruction pointer
 error message

Printing "Bad RIP value" if copy_code() fails can be misleading for
userspace pointers, since copy_code() can fail if the instruction
pointer is valid but the code is paged out. This is because copy_code()
calls copy_from_user_nmi() for userspace pointers, which disables page
fault handling.

This is reproducible in OOM situations, where it's plausible that the
code may be reclaimed in the time between entry into the kernel and when
this message is printed. This leaves a misleading log in dmesg that
suggests instruction pointer corruption has occurred, which may alarm
users.

Change the message to state the error condition more precisely.

 [ bp: Massage a bit. ]

Signed-off-by: Mark Mossberg <mark.mossberg@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201002042915.403558-1-mark.mossberg@gmail.com
---
 arch/x86/kernel/dumpstack.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 48ce44576947c8..ea8d51ec251bbf 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -115,7 +115,8 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
 	unsigned long prologue = regs->ip - PROLOGUE_SIZE;
 
 	if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
-		printk("%sCode: Bad RIP value.\n", loglvl);
+		printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
+		       loglvl, prologue);
 	} else {
 		printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
 		       __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,

From 7b9be800756f60bf5bb7baf39c7d221ecb877863 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Date: Sat, 19 Sep 2020 02:41:19 +0300
Subject: [PATCH 228/262] MAINTAINERS: TPM DEVICE DRIVER: Update GIT

Update Git URL to

git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 190c7fa2ea010a..48aff80d1f296c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17590,7 +17590,7 @@ L:	linux-integrity@vger.kernel.org
 S:	Maintained
 W:	https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity
 Q:	https://patchwork.kernel.org/project/linux-integrity/list/
-T:	git git://git.infradead.org/users/jjs/linux-tpmdd.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git
 F:	drivers/char/tpm/
 
 TRACING

From e9b60476bea058d85f8029e6701d9476f7fdb92f Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Fri, 2 Oct 2020 17:26:25 +0530
Subject: [PATCH 229/262] kselftest/arm64: Add utilities and a test to validate
 mte memory

This test checks that the memory tag is present after mte allocation and
the memory is accessible with those tags. This testcase verifies all
sync, async and none mte error reporting mode. The allocated mte buffers
are verified for Allocated range (no error expected while accessing
buffer), Underflow range, and Overflow range.

Different test scenarios covered here are,
* Verify that mte memory are accessible at byte/block level.
* Force underflow and overflow to occur and check the data consistency.
* Check to/from between tagged and untagged memory.
* Check that initial allocated memory to have 0 tag.

This change also creates the necessary infrastructure to add mte test
cases. MTE kselftests can use the several utility functions provided here
to add wide variety of mte test scenarios.

GCC compiler need flag '-march=armv8.5-a+memtag' so those flags are
verified before compilation.

The mte testcases can be launched with kselftest framework as,

make TARGETS=arm64 ARM64_SUBTARGETS=mte kselftest

or compiled as,

make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS=mte CC='compiler'

Co-developed-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201002115630.24683-2-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/Makefile        |   2 +-
 tools/testing/selftests/arm64/mte/.gitignore  |   1 +
 tools/testing/selftests/arm64/mte/Makefile    |  29 ++
 .../selftests/arm64/mte/check_buffer_fill.c   | 475 ++++++++++++++++++
 .../selftests/arm64/mte/mte_common_util.c     | 341 +++++++++++++
 .../selftests/arm64/mte/mte_common_util.h     | 117 +++++
 tools/testing/selftests/arm64/mte/mte_def.h   |  60 +++
 .../testing/selftests/arm64/mte/mte_helper.S  | 114 +++++
 8 files changed, 1138 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/arm64/mte/.gitignore
 create mode 100644 tools/testing/selftests/arm64/mte/Makefile
 create mode 100644 tools/testing/selftests/arm64/mte/check_buffer_fill.c
 create mode 100644 tools/testing/selftests/arm64/mte/mte_common_util.c
 create mode 100644 tools/testing/selftests/arm64/mte/mte_common_util.h
 create mode 100644 tools/testing/selftests/arm64/mte/mte_def.h
 create mode 100644 tools/testing/selftests/arm64/mte/mte_helper.S

diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b2e..3723d9dea11a44 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal mte
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 00000000000000..3f8c1f6c82b9ec
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1 @@
+check_buffer_fill
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 00000000000000..2480226dfe57cd
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I.
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+#Add mte compiler option
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
+CFLAGS += -march=armv8.5-a+memtag
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 00000000000000..242635d79035a7
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+	1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+	UNTAGGED_TAGGED,
+	TAGGED_UNTAGGED,
+	TAGGED_TAGGED,
+	BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+	char *ptr;
+	int i, j, item;
+	bool err;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+		if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+			return KSFT_FAIL;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+		/* Set some value in tagged memory */
+		for (j = 0; j < sizes[i]; j++)
+			ptr[j] = '1';
+		mte_wait_after_trig();
+		err = cur_mte_cxt.fault_valid;
+		/* Check the buffer whether it is filled. */
+		for (j = 0; j < sizes[i] && !err; j++) {
+			if (ptr[j] != '1')
+				err = true;
+		}
+		mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+		if (err)
+			break;
+	}
+	if (!err)
+		return KSFT_PASS;
+	else
+		return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+					  int underflow_range)
+{
+	char *ptr;
+	int i, j, item, last_index;
+	bool err;
+	char *und_ptr = NULL;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+							    underflow_range, 0);
+		if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+					       underflow_range, 0) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+		last_index = 0;
+		/* Set some value in tagged memory and make the buffer underflow */
+		for (j = sizes[i] - 1; (j >= -underflow_range) &&
+				       (cur_mte_cxt.fault_valid == false); j--) {
+			ptr[j] = '1';
+			last_index = j;
+		}
+		mte_wait_after_trig();
+		err = false;
+		/* Check whether the buffer is filled */
+		for (j = 0; j < sizes[i]; j++) {
+			if (ptr[j] != '1') {
+				err = true;
+				ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+						j, ptr);
+				break;
+			}
+		}
+		if (err)
+			goto check_buffer_underflow_by_byte_err;
+
+		switch (mode) {
+		case MTE_NONE_ERR:
+			if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+				err = true;
+				break;
+			}
+			/* There were no fault so the underflow area should be filled */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+			for (j = 0 ; j < underflow_range; j++) {
+				if (und_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_ASYNC_ERR:
+			/* Imprecise fault should occur otherwise return error */
+			if (cur_mte_cxt.fault_valid == false) {
+				err = true;
+				break;
+			}
+			/*
+			 * The imprecise fault is checked after the write to the buffer,
+			 * so the underflow area before the fault should be filled.
+			 */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			for (j = last_index ; j < 0 ; j++) {
+				if (und_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_SYNC_ERR:
+			/* Precise fault should occur otherwise return error */
+			if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+				err = true;
+				break;
+			}
+			/* Underflow area should not be filled */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			if (und_ptr[-1] == '1')
+				err = true;
+			break;
+		default:
+			err = true;
+		break;
+		}
+check_buffer_underflow_by_byte_err:
+		mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+		if (err)
+			break;
+	}
+	return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+					  int overflow_range)
+{
+	char *ptr;
+	int i, j, item, last_index;
+	bool err;
+	size_t tagged_size, overflow_size;
+	char *over_ptr = NULL;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+							    0, overflow_range);
+		if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+						 0, overflow_range) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		tagged_size = MT_ALIGN_UP(sizes[i]);
+
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+		/* Set some value in tagged memory and make the buffer underflow */
+		for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+					     (cur_mte_cxt.fault_valid == false); j++) {
+			ptr[j] = '1';
+			last_index = j;
+		}
+		mte_wait_after_trig();
+		err = false;
+		/* Check whether the buffer is filled */
+		for (j = 0; j < sizes[i]; j++) {
+			if (ptr[j] != '1') {
+				err = true;
+				ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+						j, ptr);
+				break;
+			}
+		}
+		if (err)
+			goto check_buffer_overflow_by_byte_err;
+
+		overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+		switch (mode) {
+		case MTE_NONE_ERR:
+			if ((cur_mte_cxt.fault_valid == true) ||
+			    (last_index != (sizes[i] + overflow_range - 1))) {
+				err = true;
+				break;
+			}
+			/* There were no fault so the overflow area should be filled */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+			for (j = 0 ; j < overflow_size; j++) {
+				if (over_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_ASYNC_ERR:
+			/* Imprecise fault should occur otherwise return error */
+			if (cur_mte_cxt.fault_valid == false) {
+				err = true;
+				break;
+			}
+			/*
+			 * The imprecise fault is checked after the write to the buffer,
+			 * so the overflow area should be filled before the fault.
+			 */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			for (j = tagged_size ; j < last_index; j++) {
+				if (over_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_SYNC_ERR:
+			/* Precise fault should occur otherwise return error */
+			if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+				err = true;
+				break;
+			}
+			/* Underflow area should not be filled */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+			for (j = 0 ; j < overflow_size; j++) {
+				if (over_ptr[j] == '1')
+					err = true;
+			}
+			break;
+		default:
+			err = true;
+		break;
+		}
+check_buffer_overflow_by_byte_err:
+		mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+		if (err)
+			break;
+	}
+	return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+	char *src, *dst;
+	int j, result = KSFT_PASS;
+	enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+	for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+		switch (alloc_type) {
+		case UNTAGGED_TAGGED:
+			src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+			if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)src, size, mem_type, false);
+				return KSFT_FAIL;
+			}
+
+			break;
+		case TAGGED_UNTAGGED:
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+			if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)dst, size, mem_type, false);
+				return KSFT_FAIL;
+			}
+			break;
+		case TAGGED_TAGGED:
+			src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)src, size, mem_type, true);
+				return KSFT_FAIL;
+			}
+			break;
+		default:
+			return KSFT_FAIL;
+		}
+
+		cur_mte_cxt.fault_valid = false;
+		result = KSFT_PASS;
+		mte_initialize_current_context(mode, (uintptr_t)dst, size);
+		/* Set some value in memory and copy*/
+		memset((void *)src, (int)'1', size);
+		memcpy((void *)dst, (void *)src, size);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid) {
+			result = KSFT_FAIL;
+			goto check_buffer_by_block_err;
+		}
+		/* Check the buffer whether it is filled. */
+		for (j = 0; j < size; j++) {
+			if (src[j] != dst[j] || src[j] != '1') {
+				result = KSFT_FAIL;
+				break;
+			}
+		}
+check_buffer_by_block_err:
+		mte_free_memory((void *)src, size, mem_type,
+				MT_FETCH_TAG((uintptr_t)src) ? true : false);
+		mte_free_memory((void *)dst, size, mem_type,
+				MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+		if (result != KSFT_PASS)
+			return result;
+	}
+	return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+	int i, item, result = KSFT_PASS;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	cur_mte_cxt.fault_valid = false;
+	for (i = 0; i < item; i++) {
+		result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+		if (result != KSFT_PASS)
+			break;
+	}
+	return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+	int i, new_tag;
+
+	for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+		new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+		if (tag != new_tag) {
+			ksft_print_msg("FAIL: child mte tag mismatch\n");
+			return KSFT_FAIL;
+		}
+	}
+	return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int run, fd;
+	int total = sizeof(sizes)/sizeof(int);
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		/* check initial tags for anonymous mmap */
+		ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+		if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+			return KSFT_FAIL;
+		if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+			mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+			return KSFT_FAIL;
+		}
+		mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+		/* check initial tags for file mmap */
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+		ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+		if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+			mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+		close(fd);
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	size_t page_size = getpagesize();
+	int item = sizeof(sizes)/sizeof(int);
+
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	/* Buffer by byte tests */
+	evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+	"Check buffer correctness by byte with sync err mode and mmap memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+	"Check buffer correctness by byte with async err mode and mmap memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+	"Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+	"Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+	/* Check buffer underflow with underflow size as 16 */
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+	/* Check buffer underflow with underflow size as page size */
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+	"Check buffer write underflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+	"Check buffer write underflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+	"Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+	/* Check buffer overflow with overflow size as 16 */
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+	/* Buffer by block tests */
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+	"Check buffer write correctness by block with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+	"Check buffer write correctness by block with async mode and mmap memory\n");
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+	"Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+	/* Initial tags are supposed to be 0 */
+	evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check initial tags with private mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+	"Check initial tags with shared mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+	"Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 00000000000000..39f8908988eab0
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE       256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+	unsigned long addr = (unsigned long)si->si_addr;
+
+	if (signum == SIGSEGV) {
+#ifdef DEBUG
+		ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+				((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+		if (si->si_code == SEGV_MTEAERR) {
+			if (cur_mte_cxt.trig_si_code == si->si_code)
+				cur_mte_cxt.fault_valid = true;
+			return;
+		}
+		/* Compare the context for precise error */
+		else if (si->si_code == SEGV_MTESERR) {
+			if (cur_mte_cxt.trig_si_code == si->si_code &&
+			    ((cur_mte_cxt.trig_range >= 0 &&
+			      addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+			      addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+			     (cur_mte_cxt.trig_range < 0 &&
+			      addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+			      addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+				cur_mte_cxt.fault_valid = true;
+				/* Adjust the pc by 4 */
+				((ucontext_t *)uc)->uc_mcontext.pc += 4;
+			} else {
+				ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+				exit(1);
+			}
+		} else {
+			ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+			exit(1);
+		}
+	} else if (signum == SIGBUS) {
+		ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+				((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+		if ((cur_mte_cxt.trig_range >= 0 &&
+		     addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+		     addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+		    (cur_mte_cxt.trig_range < 0 &&
+		     addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+		     addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+			cur_mte_cxt.fault_valid = true;
+			/* Adjust the pc by 4 */
+			((ucontext_t *)uc)->uc_mcontext.pc += 4;
+		}
+	}
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+	struct sigaction sa;
+
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+	sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+	void *tag_ptr;
+	int align_size;
+
+	if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+		ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+		return NULL;
+	}
+	align_size = MT_ALIGN_UP(size);
+	tag_ptr = mte_insert_random_tag(ptr);
+	mte_set_tag_address_range(tag_ptr, align_size);
+	return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+	if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+		ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+		return;
+	}
+	size = MT_ALIGN_UP(size);
+	ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+	mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after,
+					 bool tags, int fd)
+{
+	void *ptr;
+	int prot_flag, map_flag;
+	size_t entire_size = size + range_before + range_after;
+
+	if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
+	    mem_type != USE_MPROTECT) {
+		ksft_print_msg("FAIL: Invalid allocate request\n");
+		return NULL;
+	}
+	if (mem_type == USE_MALLOC)
+		return malloc(entire_size) + range_before;
+
+	prot_flag = PROT_READ | PROT_WRITE;
+	if (mem_type == USE_MMAP)
+		prot_flag |= PROT_MTE;
+
+	map_flag = mapping;
+	if (fd == -1)
+		map_flag = MAP_ANONYMOUS | map_flag;
+	if (!(mapping & MAP_SHARED))
+		map_flag |= MAP_PRIVATE;
+	ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+	if (ptr == MAP_FAILED) {
+		ksft_print_msg("FAIL: mmap allocation\n");
+		return NULL;
+	}
+	if (mem_type == USE_MPROTECT) {
+		if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+			munmap(ptr, size);
+			ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+			return NULL;
+		}
+	}
+	if (tags)
+		ptr = mte_insert_tags(ptr + range_before, size);
+	return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+				    size_t range_before, size_t range_after)
+{
+	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+					   range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+	int index;
+	char buffer[INIT_BUFFER_SIZE];
+
+	if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+		ksft_print_msg("FAIL: Invalid mmap file request\n");
+		return NULL;
+	}
+	/* Initialize the file for mappable size */
+	lseek(fd, 0, SEEK_SET);
+	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
+		write(fd, buffer, INIT_BUFFER_SIZE);
+	index -= INIT_BUFFER_SIZE;
+	write(fd, buffer, size - index);
+	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after, int fd)
+{
+	int index;
+	char buffer[INIT_BUFFER_SIZE];
+	int map_size = size + range_before + range_after;
+
+	if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+		ksft_print_msg("FAIL: Invalid mmap file request\n");
+		return NULL;
+	}
+	/* Initialize the file for mappable size */
+	lseek(fd, 0, SEEK_SET);
+	for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+		write(fd, buffer, INIT_BUFFER_SIZE);
+	index -= INIT_BUFFER_SIZE;
+	write(fd, buffer, map_size - index);
+	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+					   range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+				    size_t range_before, size_t range_after, bool tags)
+{
+	switch (mem_type) {
+	case USE_MALLOC:
+		free(ptr - range_before);
+		break;
+	case USE_MMAP:
+	case USE_MPROTECT:
+		if (tags)
+			mte_clear_tags(ptr, size);
+		munmap(ptr - range_before, size + range_before + range_after);
+		break;
+	default:
+		ksft_print_msg("FAIL: Invalid free request\n");
+		break;
+	}
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+			       size_t range_before, size_t range_after)
+{
+	__mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+	__mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+	cur_mte_cxt.fault_valid = false;
+	cur_mte_cxt.trig_addr = ptr;
+	cur_mte_cxt.trig_range = range;
+	if (mode == MTE_SYNC_ERR)
+		cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+	else if (mode == MTE_ASYNC_ERR)
+		cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+	else
+		cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+	unsigned long en = 0;
+
+	if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
+	      mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+		ksft_print_msg("FAIL: Invalid mte config option\n");
+		return -EINVAL;
+	}
+	en = PR_TAGGED_ADDR_ENABLE;
+	if (mte_option == MTE_SYNC_ERR)
+		en |= PR_MTE_TCF_SYNC;
+	else if (mte_option == MTE_ASYNC_ERR)
+		en |= PR_MTE_TCF_ASYNC;
+	else if (mte_option == MTE_NONE_ERR)
+		en |= PR_MTE_TCF_NONE;
+
+	en |= (incl_mask << PR_MTE_TAG_SHIFT);
+	/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+	if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+		ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#define ID_AA64PFR1_MTE_SHIFT		8
+#define ID_AA64PFR1_MTE			2
+
+int mte_default_setup(void)
+{
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+	unsigned long en = 0;
+	int ret;
+
+	if (!(hwcaps & HWCAP_CPUID)) {
+		ksft_print_msg("FAIL: CPUID registers unavailable\n");
+		return KSFT_FAIL;
+	}
+	/* Read ID_AA64PFR1_EL1 register */
+	asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
+	if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+		ksft_print_msg("FAIL: MTE features unavailable\n");
+		return KSFT_SKIP;
+	}
+	/* Get current mte mode */
+	ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+	if (ret < 0) {
+		ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+		return KSFT_FAIL;
+	}
+	if (ret & PR_MTE_TCF_SYNC)
+		mte_cur_mode = MTE_SYNC_ERR;
+	else if (ret & PR_MTE_TCF_ASYNC)
+		mte_cur_mode = MTE_ASYNC_ERR;
+	else if (ret & PR_MTE_TCF_NONE)
+		mte_cur_mode = MTE_NONE_ERR;
+
+	mte_cur_pstate_tco = mte_get_pstate_tco();
+	/* Disable PSTATE.TCO */
+	mte_disable_pstate_tco();
+	return 0;
+}
+
+void mte_restore_setup(void)
+{
+	mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+	if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+		mte_enable_pstate_tco();
+	else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+		mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+	int fd;
+	char filename[] = "/dev/shm/tmp_XXXXXX";
+
+	/* Create a file in the tmpfs filesystem */
+	fd = mkstemp(&filename[0]);
+	if (fd == -1) {
+		ksft_print_msg("FAIL: Unable to open temporary file\n");
+		return 0;
+	}
+	unlink(&filename[0]);
+	return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 00000000000000..45160e061a0e58
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+	USE_MALLOC,
+	USE_MMAP,
+	USE_MPROTECT,
+};
+
+enum mte_mode {
+	MTE_NONE_ERR,
+	MTE_SYNC_ERR,
+	MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+	/* Address start which triggers mte tag fault */
+	unsigned long trig_addr;
+	/* Address range for mte tag fault and negative value means underflow */
+	ssize_t trig_range;
+	/* siginfo si code */
+	unsigned long trig_si_code;
+	/* Flag to denote if correct fault caught */
+	bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+				    size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+			       bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+			       size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+	if (err == KSFT_PASS)
+		ksft_test_result_pass(msg);
+	else if (err == KSFT_FAIL)
+		ksft_test_result_fail(msg);
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+					 int mem_type, bool tags)
+{
+	if (ptr == NULL) {
+		ksft_print_msg("FAIL: memory allocation\n");
+		return KSFT_FAIL;
+	}
+
+	if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+		ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+		mte_free_memory((void *)ptr, size, mem_type, false);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+					       size_t range_before, size_t range_after)
+{
+	if (ptr == NULL) {
+		ksft_print_msg("FAIL: memory allocation\n");
+		return KSFT_FAIL;
+	}
+
+	if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+		ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+		mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+					  range_after);
+		return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 00000000000000..9b188254b61a34
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define	SEGV_MTEAERR	8
+#endif
+#ifndef SEGV_MTESERR
+#define	SEGV_MTESERR	9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE	 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE	(1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT	1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define	PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define	PR_MTE_TAG_SHIFT	3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT		56
+#define MT_TAG_MASK		0xFUL
+#define MT_FREE_TAG		0x0UL
+#define MT_GRANULE_SIZE         16
+#define MT_TAG_COUNT		16
+#define MT_INCLUDE_TAG_MASK	0xFFFF
+#define MT_EXCLUDE_TAG_MASK	0x0
+
+#define MT_ALIGN_GRANULE	(MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x)		((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y)	((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x)		((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x)		((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT	25
+#define MT_PSTATE_TCO_MASK	~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN	1
+#define MT_PSTATE_TCO_DIS	0
+
+#define MT_EXCLUDE_TAG(x)		(1 << (x))
+#define MT_INCLUDE_VALID_TAG(x)		(MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x)	(MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG		MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 00000000000000..48e049fbad9a91
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+#define ENTRY(name) \
+	.globl name ;\
+	.p2align 2;\
+	.type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+	.size name, .-name ;
+
+	.text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ *			  the source pointer has it.
+ * Input:
+ *		x0 - source pointer with a tag/no-tag
+ * Return:
+ *		x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+	irg	x0, x0, xzr
+	ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ *		x0 - source pointer
+ * Return:
+ *		x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+	ldg	x0, [x0]
+	ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ *		x0 - source pointer with tag data
+ *		x1 - range
+ * Return:
+ *		none
+ */
+ENTRY(mte_set_tag_address_range)
+	cbz	x1, 2f
+1:
+	stg	x0, [x0, #0x0]
+	add	x0, x0, #MT_GRANULE_SIZE
+	sub	x1, x1, #MT_GRANULE_SIZE
+	cbnz	x1, 1b
+2:
+	ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ *		x0 - source pointer with tag data
+ *		x1 - range
+ * Return:
+ *		none
+ */
+ENTRY(mte_clear_tag_address_range)
+	cbz	x1, 2f
+1:
+	stzg	x0, [x0, #0x0]
+	add	x0, x0, #MT_GRANULE_SIZE
+	sub	x1, x1, #MT_GRANULE_SIZE
+	cbnz	x1, 1b
+2:
+	ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		none
+ */
+ENTRY(mte_enable_pstate_tco)
+	msr	tco, #MT_PSTATE_TCO_EN
+	ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		none
+ */
+ENTRY(mte_disable_pstate_tco)
+	msr	tco, #MT_PSTATE_TCO_DIS
+	ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		x0
+ */
+ENTRY(mte_get_pstate_tco)
+	mrs	x0, tco
+	ubfx	x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+	ret
+ENDPROC(mte_get_pstate_tco)

From f3b2a26ca78da2ef36cf76d5511e3c94baee96a1 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Fri, 2 Oct 2020 17:26:26 +0530
Subject: [PATCH 230/262] kselftest/arm64: Verify mte tag inclusion via prctl

This testcase verifies that the tag generated with "irg" instruction
contains only included tags. This is done via prtcl call.

This test covers 4 scenarios,
* At least one included tag.
* More than one included tags.
* All included.
* None included.

Co-developed-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201002115630.24683-3-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/.gitignore  |   1 +
 .../arm64/mte/check_tags_inclusion.c          | 185 ++++++++++++++++++
 2 files changed, 186 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/mte/check_tags_inclusion.c

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index 3f8c1f6c82b9ec..c3fca255d3d670 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1 +1,2 @@
 check_buffer_fill
+check_tags_inclusion
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 00000000000000..94d245a0ed560f
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE		(5 * MT_GRANULE_SIZE)
+#define RUNS			(MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK	(0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+	/* Check the validity of the tagged pointer */
+	memset((void *)ptr, '1', BUFFER_SIZE);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	/* Proceed further for nonzero tags */
+	if (!MT_FETCH_TAG((uintptr_t)ptr))
+		return KSFT_PASS;
+	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+	/* Check the validity outside the range */
+	ptr[BUFFER_SIZE] = '2';
+	mte_wait_after_trig();
+	if (!cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	else
+		return KSFT_PASS;
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int tag, run, result = KSFT_PASS;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+		mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+		/* Try to catch a excluded tag by a number of tries. */
+		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			/* Check tag value */
+			if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+					       MT_FETCH_TAG((uintptr_t)ptr),
+					       MT_INCLUDE_VALID_TAG(tag));
+				result = KSFT_FAIL;
+				break;
+			}
+			result = verify_mte_pointer_validity(ptr, mode);
+		}
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int tag, run, result = KSFT_PASS;
+	unsigned long excl_mask = 0;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+		excl_mask |= 1 << tag;
+		mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+		/* Try to catch a excluded tag by a number of tries. */
+		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			/* Check tag value */
+			if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+					       MT_FETCH_TAG((uintptr_t)ptr),
+					       MT_INCLUDE_VALID_TAGS(excl_mask));
+				result = KSFT_FAIL;
+				break;
+			}
+			result = verify_mte_pointer_validity(ptr, mode);
+		}
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int run, result = KSFT_PASS;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+	/* Try to catch a excluded tag by a number of tries. */
+	for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+		/*
+		 * Here tag byte can be between 0x0 to 0xF (full allowed range)
+		 * so no need to match so just verify if it is writable.
+		 */
+		result = verify_mte_pointer_validity(ptr, mode);
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int run;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+	/* Try to catch a excluded tag by a number of tries. */
+	for (run = 0; run < RUNS; run++) {
+		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+		/* Here all tags exluded so tag value generated should be 0 */
+		if (MT_FETCH_TAG((uintptr_t)ptr)) {
+			ksft_print_msg("FAIL: included tag value found\n");
+			mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+			return KSFT_FAIL;
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+		/* Check the write validity of the untagged pointer */
+		memset((void *)ptr, '1', BUFFER_SIZE);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid)
+			break;
+	}
+	mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+	if (cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	else
+		return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check an included tag value with sync mode\n");
+	evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check different included tags value with sync mode\n");
+	evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check none included tags value with sync mode\n");
+	evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check all included tags value with sync mode\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}

From dfe537cf47186bfb59db3e57eed7417cd5efde17 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Fri, 2 Oct 2020 17:26:27 +0530
Subject: [PATCH 231/262] kselftest/arm64: Check forked child mte memory
 accessibility

This test covers the mte memory behaviour of the forked process with
different mapping properties and flags. It checks that all bytes of
forked child memory are accessible with the same tag as that of the
parent and memory accessed outside the tag range causes fault to
occur.

Co-developed-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201002115630.24683-4-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/.gitignore  |   1 +
 .../selftests/arm64/mte/check_child_memory.c  | 195 ++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/mte/check_child_memory.c

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index c3fca255d3d670..b5fcc0fb4d971a 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1,2 +1,3 @@
 check_buffer_fill
 check_tags_inclusion
+check_child_memory
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 00000000000000..97bebdecd29efb
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE		(5 * MT_GRANULE_SIZE)
+#define RUNS			(MT_TAG_COUNT)
+#define UNDERFLOW		MT_GRANULE_SIZE
+#define OVERFLOW		MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+	1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+	int i, parent_tag, child_tag, fault, child_status;
+	pid_t child;
+
+	parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+	fault = 0;
+
+	child = fork();
+	if (child == -1) {
+		ksft_print_msg("FAIL: child process creation\n");
+		return KSFT_FAIL;
+	} else if (child == 0) {
+		mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+		/* Do copy on write */
+		memset(ptr, '1', size);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == true) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+		for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+			child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+			if (parent_tag != child_tag) {
+				ksft_print_msg("FAIL: child mte tag mismatch\n");
+				fault = 1;
+				goto check_child_tag_inheritance_err;
+			}
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+		memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == false) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+		memset(ptr + size, '3', OVERFLOW);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == false) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+check_child_tag_inheritance_err:
+		_exit(fault);
+	}
+	/* Wait for child process to terminate */
+	wait(&child_status);
+	if (WIFEXITED(child_status))
+		fault = WEXITSTATUS(child_status);
+	else
+		fault = 1;
+	return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int run, result;
+	int item = sizeof(sizes)/sizeof(int);
+
+	item = sizeof(sizes)/sizeof(int);
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < item; run++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+							    UNDERFLOW, OVERFLOW);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS)
+			return KSFT_FAIL;
+		result = check_child_tag_inheritance(ptr, sizes[run], mode);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		if (result == KSFT_FAIL)
+			return result;
+	}
+	return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+	char *ptr, *map_ptr;
+	int run, fd, map_size, result = KSFT_PASS;
+	int total = sizeof(sizes)/sizeof(int);
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on file based memory\n");
+			munmap((void *)map_ptr, map_size);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_child_tag_inheritance(ptr, sizes[run], mode);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		munmap((void *)map_ptr, map_size);
+		close(fd);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	int item = sizeof(sizes)/sizeof(int);
+
+	page_size = getpagesize();
+	if (!page_size) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+	mte_register_signal(SIGBUS, mte_default_handler);
+
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+	evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}

From 53ec81d232134f61806dfd93025320caa1aaf559 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Fri, 2 Oct 2020 17:26:28 +0530
Subject: [PATCH 232/262] kselftest/arm64: Verify all different mmap MTE
 options

This testcase checks the different unsupported/supported options for mmap
if used with PROT_MTE memory protection flag. These checks are,

* Either pstate.tco enable or prctl PR_MTE_TCF_NONE option should not cause
  any tag mismatch faults.
* Different combinations of anonymous/file memory mmap, mprotect,
  sync/async error mode and private/shared mappings should work.
* mprotect should not be able to clear the PROT_MTE page property.

Co-developed-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Gabor Kertesz <gabor.kertesz@arm.com>
Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201002115630.24683-5-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/.gitignore  |   1 +
 .../selftests/arm64/mte/check_mmap_options.c  | 262 ++++++++++++++++++
 2 files changed, 263 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/mte/check_mmap_options.c

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index b5fcc0fb4d971a..79a215d3bbd005 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1,3 +1,4 @@
 check_buffer_fill
 check_tags_inclusion
 check_child_memory
+check_mmap_options
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 00000000000000..33b13b86199b5e
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS			(MT_TAG_COUNT)
+#define UNDERFLOW		MT_GRANULE_SIZE
+#define OVERFLOW		MT_GRANULE_SIZE
+#define TAG_CHECK_ON		0
+#define TAG_CHECK_OFF		1
+
+static size_t page_size;
+static int sizes[] = {
+	1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+	mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+	memset(ptr, '1', size);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == true)
+		return KSFT_FAIL;
+
+	mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+	memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+		return KSFT_FAIL;
+	if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+		return KSFT_FAIL;
+
+	mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+	memset(ptr + size, '3', OVERFLOW);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+		return KSFT_FAIL;
+	if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+	char *ptr, *map_ptr;
+	int run, result, map_size;
+	int item = sizeof(sizes)/sizeof(int);
+
+	item = sizeof(sizes)/sizeof(int);
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < item; run++) {
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+			munmap((void *)map_ptr, map_size);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+		if (result == KSFT_FAIL)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+	char *ptr, *map_ptr;
+	int run, fd, map_size;
+	int total = sizeof(sizes)/sizeof(int);
+	int result = KSFT_PASS;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+
+		map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+		map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on file based memory\n");
+			munmap((void *)map_ptr, map_size);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		munmap((void *)map_ptr, map_size);
+		close(fd);
+		if (result == KSFT_FAIL)
+			break;
+	}
+	return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+	char *ptr, *map_ptr;
+	int run, prot_flag, result, fd, map_size;
+	int total = sizeof(sizes)/sizeof(int);
+
+	prot_flag = PROT_READ | PROT_WRITE;
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+							    UNDERFLOW, OVERFLOW);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS)
+			return KSFT_FAIL;
+		map_ptr = ptr - UNDERFLOW;
+		/* Try to clear PROT_MTE property and verify it by tag checking */
+		if (mprotect(map_ptr, map_size, prot_flag)) {
+			mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+						  UNDERFLOW, OVERFLOW);
+			ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+		ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+								 UNDERFLOW, OVERFLOW, fd);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		map_ptr = ptr - UNDERFLOW;
+		/* Try to clear PROT_MTE property and verify it by tag checking */
+		if (mprotect(map_ptr, map_size, prot_flag)) {
+			ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+			mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+						  UNDERFLOW, OVERFLOW);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		close(fd);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	int item = sizeof(sizes)/sizeof(int);
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+	page_size = getpagesize();
+	if (!page_size) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	/* Register signal handlers */
+	mte_register_signal(SIGBUS, mte_default_handler);
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	mte_enable_pstate_tco();
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+	mte_disable_pstate_tco();
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+	evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}

From f981d8fa26469e19d2ce73006685ae88205e914a Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Fri, 2 Oct 2020 17:26:29 +0530
Subject: [PATCH 233/262] kselftest/arm64: Verify KSM page merge for MTE pages

Add a testcase to check that KSM should not merge pages containing
same data with same/different MTE tag values.

This testcase has one positive tests and passes if page merging
happens according to the above rule. It also saves and restores
any modified ksm sysfs entries.

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201002115630.24683-6-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/.gitignore  |   1 +
 .../selftests/arm64/mte/check_ksm_options.c   | 159 ++++++++++++++++++
 2 files changed, 160 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/mte/check_ksm_options.c

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index 79a215d3bbd005..44e9bfdaeca61c 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -2,3 +2,4 @@ check_buffer_fill
 check_tags_inclusion
 check_child_memory
 check_mmap_options
+check_ksm_options
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 00000000000000..bc41ae630c86c5
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT	10
+#define PATH_KSM	"/sys/kernel/mm/ksm/"
+#define MAX_LOOP	4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+	FILE *f;
+	unsigned long val = 0;
+
+	f = fopen(str, "r");
+	if (!f) {
+		ksft_print_msg("ERR: missing %s\n", str);
+		return 0;
+	}
+	fscanf(f, "%lu", &val);
+	fclose(f);
+	return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+	FILE *f;
+
+	f = fopen(str, "w");
+	if (!f) {
+		ksft_print_msg("ERR: missing %s\n", str);
+		return;
+	}
+	fprintf(f, "%lu", val);
+	fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+	ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+	write_sysfs(PATH_KSM "merge_across_nodes", 1);
+	ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+	write_sysfs(PATH_KSM "sleep_millisecs", 0);
+	ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+	write_sysfs(PATH_KSM "run", 1);
+	ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+	write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+	ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+	write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+	write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+	write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+	write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+	write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+	write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+	int cur_count = read_sysfs(PATH_KSM "full_scans");
+	int scan_count = cur_count + 1;
+	int max_loop_count = MAX_LOOP;
+
+	while ((cur_count < scan_count) && max_loop_count) {
+		sleep(1);
+		cur_count = read_sysfs(PATH_KSM "full_scans");
+		max_loop_count--;
+	}
+#ifdef DEBUG
+	ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+			read_sysfs(PATH_KSM "pages_shared"),
+			read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int err, ret;
+
+	err = KSFT_FAIL;
+	if (access(PATH_KSM, F_OK) == -1) {
+		ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+		return err;
+	}
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+	if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	/* Insert same data in all the pages */
+	memset(ptr, 'A', TEST_UNIT * page_sz);
+	ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+	if (ret) {
+		ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+		goto madvise_err;
+	}
+	mte_ksm_scan();
+	/* Tagged pages should not merge */
+	if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+	    (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+		err = KSFT_PASS;
+madvise_err:
+	mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+	page_sz = getpagesize();
+	if (!page_sz) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	/* Register signal handlers */
+	mte_register_signal(SIGBUS, mte_default_handler);
+	mte_register_signal(SIGSEGV, mte_default_handler);
+	/* Enable KSM */
+	mte_ksm_setup();
+
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+	mte_ksm_restore();
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}

From 4dafc08d0ba4768e8540f49ab40c3ea26e40d554 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.kachhap@arm.com>
Date: Fri, 2 Oct 2020 17:26:30 +0530
Subject: [PATCH 234/262] kselftest/arm64: Check mte tagged user address in
 kernel

Add a testcase to check that user address with valid/invalid
mte tag works in kernel mode. This test verifies that the kernel
API's __arch_copy_from_user/__arch_copy_to_user works by considering
if the user pointer has valid/invalid allocation tags.

In MTE sync mode, file memory read/write and other similar interfaces
fails if a user memory with invalid tag is accessed in kernel. In async
mode no such failure occurs.

Signed-off-by: Amit Daniel Kachhap <amit.kachhap@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201002115630.24683-7-amit.kachhap@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/mte/.gitignore  |   1 +
 .../selftests/arm64/mte/check_user_mem.c      | 111 ++++++++++++++++++
 .../selftests/arm64/mte/mte_common_util.h     |   1 +
 .../testing/selftests/arm64/mte/mte_helper.S  |  14 +++
 4 files changed, 127 insertions(+)
 create mode 100644 tools/testing/selftests/arm64/mte/check_user_mem.c

diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index 44e9bfdaeca61c..bc3ac63f33144a 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -3,3 +3,4 @@ check_tags_inclusion
 check_child_memory
 check_mmap_options
 check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 00000000000000..594e98e76880a5
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+{
+	int fd, i, err;
+	char val = 'A';
+	size_t len, read_len;
+	void *ptr, *ptr_next;
+
+	err = KSFT_FAIL;
+	len = 2 * page_sz;
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	fd = create_temp_file();
+	if (fd == -1)
+		return KSFT_FAIL;
+	for (i = 0; i < len; i++)
+		write(fd, &val, sizeof(val));
+	lseek(fd, 0, 0);
+	ptr = mte_allocate_memory(len, mem_type, mapping, true);
+	if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+		close(fd);
+		return KSFT_FAIL;
+	}
+	mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+	/* Copy from file into buffer with valid tag */
+	read_len = read(fd, ptr, len);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid || read_len < len)
+		goto usermem_acc_err;
+	/* Verify same pattern is read */
+	for (i = 0; i < len; i++)
+		if (*(char *)(ptr + i) != val)
+			break;
+	if (i < len)
+		goto usermem_acc_err;
+
+	/* Tag the next half of memory with different value */
+	ptr_next = (void *)((unsigned long)ptr + page_sz);
+	ptr_next = mte_insert_new_tag(ptr_next);
+	mte_set_tag_address_range(ptr_next, page_sz);
+
+	lseek(fd, 0, 0);
+	/* Copy from file into buffer with invalid tag */
+	read_len = read(fd, ptr, len);
+	mte_wait_after_trig();
+	/*
+	 * Accessing user memory in kernel with invalid tag should fail in sync
+	 * mode without fault but may not fail in async mode as per the
+	 * implemented MTE userspace support in Arm64 kernel.
+	 */
+	if (mode == MTE_SYNC_ERR &&
+	    !cur_mte_cxt.fault_valid && read_len < len) {
+		err = KSFT_PASS;
+	} else if (mode == MTE_ASYNC_ERR &&
+		   !cur_mte_cxt.fault_valid && read_len == len) {
+		err = KSFT_PASS;
+	}
+usermem_acc_err:
+	mte_free_memory((void *)ptr, len, mem_type, true);
+	close(fd);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	page_sz = getpagesize();
+	if (!page_sz) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	err = mte_default_setup();
+	if (err)
+		return err;
+	/* Register signal handlers */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
+
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
index 45160e061a0e58..195a7d1879e63e 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.h
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -64,6 +64,7 @@ int create_temp_file(void);
 
 /* Assembly MTE utility functions */
 void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
 void *mte_get_tag_address(void *ptr);
 void mte_set_tag_address_range(void *ptr, int range);
 void mte_clear_tag_address_range(void *ptr, int range);
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
index 48e049fbad9a91..a02c04cd0aac98 100644
--- a/tools/testing/selftests/arm64/mte/mte_helper.S
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -26,6 +26,20 @@ ENTRY(mte_insert_random_tag)
 	ret
 ENDPROC(mte_insert_random_tag)
 
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ *		       source pointer has it.
+ * Input:
+ *		x0 - source pointer with a tag/no-tag
+ * Return:
+ *		x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+	gmi	x1, x0, xzr
+	irg	x0, x0, x1
+	ret
+ENDPROC(mte_insert_new_tag)
+
 /*
  * mte_get_tag_address: Get the tag from given address.
  * Input:

From 353e228eb355be5a65a3c0996c774a0f46737fda Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 5 Oct 2020 17:43:03 +0100
Subject: [PATCH 235/262] arm64: initialize per-cpu offsets earlier

The current initialization of the per-cpu offset register is difficult
to follow and this initialization is not always early enough for
upcoming instrumentation with KCSAN, where the instrumentation callbacks
use the per-cpu offset.

To make it possible to support KCSAN, and to simplify reasoning about
early bringup code, let's initialize the per-cpu offset earlier, before
we run any C code that may consume it. To do so, this patch adds a new
init_this_cpu_offset() helper that's called before the usual
primary/secondary start functions. For consistency, this is also used to
re-initialize the per-cpu offset after the runtime per-cpu areas have
been allocated (which can change CPU0's offset).

So that init_this_cpu_offset() isn't subject to any instrumentation that
might consume the per-cpu offset, it is marked with noinstr, preventing
instrumentation.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20201005164303.21389-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu.h |  2 ++
 arch/arm64/kernel/head.S     |  3 +++
 arch/arm64/kernel/setup.c    | 12 ++++++------
 arch/arm64/kernel/smp.c      | 13 ++++++++-----
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 7faae6ff3ab4d5..d9d60b18e8116f 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -68,4 +68,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info);
 void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
 				 struct cpuinfo_arm64 *boot);
 
+void init_this_cpu_offset(void);
+
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 037421c66b147c..2720e6ec681409 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -452,6 +452,8 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
+	bl	init_this_cpu_offset
+
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
 #endif
@@ -758,6 +760,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	ptrauth_keys_init_cpu x2, x3, x4, x5
 #endif
 
+	bl	init_this_cpu_offset
 	b	secondary_start_kernel
 SYM_FUNC_END(__secondary_switched)
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 77c4c9bad1b852..005171972764b6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	set_cpu_logical_map(0, mpidr);
 
-	/*
-	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
-	 * using percpu variable early, for example, lockdep will
-	 * access percpu variable inside lock_release
-	 */
-	set_my_cpu_offset(0);
 	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
 		(unsigned long)mpidr, read_cpuid_id());
 }
@@ -282,6 +276,12 @@ u64 cpu_logical_map(int cpu)
 }
 EXPORT_SYMBOL_GPL(cpu_logical_map);
 
+void noinstr init_this_cpu_offset(void)
+{
+	unsigned int cpu = task_cpu(current);
+	set_my_cpu_offset(per_cpu_offset(cpu));
+}
+
 void __init __no_sanitize_address setup_arch(char **cmdline_p)
 {
 	init_mm.start_code = (unsigned long) _text;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 355ee9eed4dded..7714310fba2264 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -192,10 +192,7 @@ asmlinkage notrace void secondary_start_kernel(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;
 	const struct cpu_operations *ops;
-	unsigned int cpu;
-
-	cpu = task_cpu(current);
-	set_my_cpu_offset(per_cpu_offset(cpu));
+	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * All kernel threads share the same mm context; grab a
@@ -435,7 +432,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
-	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	/*
+	 * Now that setup_per_cpu_areas() has allocated the runtime per-cpu
+	 * areas it is only safe to read the CPU0 boot-time area, and we must
+	 * reinitialize the offset to point to the runtime area.
+	 */
+	init_this_cpu_offset();
+
 	cpuinfo_store_boot_cpu();
 
 	/*

From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Oct 2020 20:40:16 -0700
Subject: [PATCH 236/262] x86, powerpc: Rename memcpy_mcsafe() to
 copy_mc_to_{user, kernel}()

In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.

Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:

  On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
  >
  > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
  > >
  > > However now I see that copy_user_generic() works for the wrong reason.
  > > It works because the exception on the source address due to poison
  > > looks no different than a write fault on the user address to the
  > > caller, it's still just a short copy. So it makes copy_to_user() work
  > > for the wrong reason relative to the name.
  >
  > Right.
  >
  > And it won't work that way on other architectures. On x86, we have a
  > generic function that can take faults on either side, and we use it
  > for both cases (and for the "in_user" case too), but that's an
  > artifact of the architecture oddity.
  >
  > In fact, it's probably wrong even on x86 - because it can hide bugs -
  > but writing those things is painful enough that everybody prefers
  > having just one function.

Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().

Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.

One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.

 [ bp: Massage a bit. ]

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
---
 arch/powerpc/Kconfig                          |   2 +-
 arch/powerpc/include/asm/string.h             |   2 -
 arch/powerpc/include/asm/uaccess.h            |  40 ++++--
 arch/powerpc/lib/Makefile                     |   2 +-
 .../lib/{memcpy_mcsafe_64.S => copy_mc_64.S}  |   4 +-
 arch/x86/Kconfig                              |   2 +-
 arch/x86/Kconfig.debug                        |   2 +-
 arch/x86/include/asm/copy_mc_test.h           |  75 +++++++++++
 arch/x86/include/asm/mce.h                    |   9 ++
 arch/x86/include/asm/mcsafe_test.h            |  75 -----------
 arch/x86/include/asm/string_64.h              |  32 -----
 arch/x86/include/asm/uaccess.h                |   9 ++
 arch/x86/include/asm/uaccess_64.h             |  20 ---
 arch/x86/kernel/cpu/mce/core.c                |   8 +-
 arch/x86/kernel/quirks.c                      |  10 +-
 arch/x86/lib/Makefile                         |   1 +
 arch/x86/lib/copy_mc.c                        |  82 +++++++++++
 arch/x86/lib/copy_mc_64.S                     | 127 ++++++++++++++++++
 arch/x86/lib/memcpy_64.S                      | 115 ----------------
 arch/x86/lib/usercopy_64.c                    |  21 ---
 drivers/md/dm-writecache.c                    |  15 ++-
 drivers/nvdimm/claim.c                        |   2 +-
 drivers/nvdimm/pmem.c                         |   6 +-
 include/linux/string.h                        |   9 +-
 include/linux/uaccess.h                       |  13 ++
 include/linux/uio.h                           |  10 +-
 lib/Kconfig                                   |   7 +-
 lib/iov_iter.c                                |  48 ++++---
 tools/arch/x86/include/asm/mcsafe_test.h      |  13 --
 tools/arch/x86/lib/memcpy_64.S                | 115 ----------------
 tools/objtool/check.c                         |   4 +-
 tools/perf/bench/Build                        |   1 -
 tools/perf/bench/mem-memcpy-x86-64-lib.c      |  24 ----
 tools/testing/nvdimm/test/nfit.c              |  49 +++----
 .../selftests/powerpc/copyloops/.gitignore    |   2 +-
 .../selftests/powerpc/copyloops/Makefile      |   6 +-
 .../selftests/powerpc/copyloops/copy_mc_64.S  |   1 +
 .../powerpc/copyloops/memcpy_mcsafe_64.S      |   1 -
 38 files changed, 433 insertions(+), 531 deletions(-)
 rename arch/powerpc/lib/{memcpy_mcsafe_64.S => copy_mc_64.S} (98%)
 create mode 100644 arch/x86/include/asm/copy_mc_test.h
 delete mode 100644 arch/x86/include/asm/mcsafe_test.h
 create mode 100644 arch/x86/lib/copy_mc.c
 create mode 100644 arch/x86/lib/copy_mc_64.S
 delete mode 100644 tools/arch/x86/include/asm/mcsafe_test.h
 delete mode 100644 tools/perf/bench/mem-memcpy-x86-64-lib.c
 create mode 120000 tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
 delete mode 120000 tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1f48bbfb3ce99d..76473eda3426c6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -136,7 +136,7 @@ config PPC
 	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC32 && !HIBERNATION)
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UACCESS_FLUSHCACHE
-	select ARCH_HAS_UACCESS_MCSAFE		if PPC64
+	select ARCH_HAS_COPY_MC			if PPC64
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_KEEP_MEMBLOCK
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 283552cd0e58ef..2aa0e31e688443 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
 #ifndef CONFIG_KASAN
 #define __HAVE_ARCH_MEMSET32
 #define __HAVE_ARCH_MEMSET64
-#define __HAVE_ARCH_MEMCPY_MCSAFE
 
-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
 extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
 extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 00699903f1efca..20a35373cafca3 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -435,6 +435,32 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
+{
+	return copy_mc_generic(to, from, size);
+}
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(check_copy_size(from, n, true))) {
+		if (access_ok(to, n)) {
+			allow_write_to_user(to, n);
+			n = copy_mc_generic((void *)to, from, n);
+			prevent_write_to_user(to, n);
+		}
+	}
+
+	return n;
+}
+#endif
+
 #ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
@@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return ret;
 }
 
-static __always_inline unsigned long __must_check
-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
-{
-	if (likely(check_copy_size(from, n, true))) {
-		if (access_ok(to, n)) {
-			allow_write_to_user(to, n);
-			n = memcpy_mcsafe((void *)to, from, n);
-			prevent_write_to_user(to, n);
-		}
-	}
-
-	return n;
-}
-
 unsigned long __arch_clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d66a645503ebdc..69a91b571845d7 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 			       memcpy_power7.o
 
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-	   memcpy_64.o memcpy_mcsafe_64.o
+	   memcpy_64.o copy_mc_64.o
 
 ifndef CONFIG_PPC_QUEUED_SPINLOCKS
 obj64-$(CONFIG_SMP)	+= locks.o
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/copy_mc_64.S
similarity index 98%
rename from arch/powerpc/lib/memcpy_mcsafe_64.S
rename to arch/powerpc/lib/copy_mc_64.S
index cb882d9a6d8a3d..88d46c471493b6 100644
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -50,7 +50,7 @@ err3;	stb	r0,0(r3)
 	blr
 
 
-_GLOBAL(memcpy_mcsafe)
+_GLOBAL(copy_mc_generic)
 	mr	r7,r5
 	cmpldi	r5,16
 	blt	.Lshort_copy
@@ -239,4 +239,4 @@ err1;	stb	r0,0(r3)
 15:	li	r3,0
 	blr
 
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7101ac64bb209d..e876b3a087f964 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -75,7 +75,7 @@ config X86
 	select ARCH_HAS_PTE_DEVMAP		if X86_64
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
-	select ARCH_HAS_UACCESS_MCSAFE		if X86_64 && X86_MCE
+	select ARCH_HAS_COPY_MC			if X86_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ee1d3c5834c62e..27b5e2bc6a0164 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
 	  You should normally say N here, unless you want to debug early
 	  crashes or need a very simple printk logging facility.
 
-config MCSAFE_TEST
+config COPY_MC_TEST
 	def_bool n
 
 config EFI_PGT_DUMP
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
new file mode 100644
index 00000000000000..e4991ba967266e
--- /dev/null
+++ b/arch/x86/include/asm/copy_mc_test.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _COPY_MC_TEST_H_
+#define _COPY_MC_TEST_H_
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_COPY_MC_TEST
+extern unsigned long copy_mc_test_src;
+extern unsigned long copy_mc_test_dst;
+
+static inline void copy_mc_inject_src(void *addr)
+{
+	if (addr)
+		copy_mc_test_src = (unsigned long) addr;
+	else
+		copy_mc_test_src = ~0UL;
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+	if (addr)
+		copy_mc_test_dst = (unsigned long) addr;
+	else
+		copy_mc_test_dst = ~0UL;
+}
+#else /* CONFIG_COPY_MC_TEST */
+static inline void copy_mc_inject_src(void *addr)
+{
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+}
+#endif /* CONFIG_COPY_MC_TEST */
+
+#else /* __ASSEMBLY__ */
+#include <asm/export.h>
+
+#ifdef CONFIG_COPY_MC_TEST
+.macro COPY_MC_TEST_CTL
+	.pushsection .data
+	.align 8
+	.globl copy_mc_test_src
+	copy_mc_test_src:
+		.quad 0
+	EXPORT_SYMBOL_GPL(copy_mc_test_src)
+	.globl copy_mc_test_dst
+	copy_mc_test_dst:
+		.quad 0
+	EXPORT_SYMBOL_GPL(copy_mc_test_dst)
+	.popsection
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+	leaq \count(\reg), %r9
+	cmp copy_mc_test_src, %r9
+	ja \target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+	leaq \count(\reg), %r9
+	cmp copy_mc_test_dst, %r9
+	ja \target
+.endm
+#else
+.macro COPY_MC_TEST_CTL
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+.endm
+#endif /* CONFIG_COPY_MC_TEST */
+#endif /* __ASSEMBLY__ */
+#endif /* _COPY_MC_TEST_H_ */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 109af5c7f51546..ba2062d6df92a8 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 extern int mce_p5_enabled;
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+extern void enable_copy_mc_fragile(void);
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
+#else
+static inline void enable_copy_mc_fragile(void)
+{
+}
+#endif
+
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index eb59804b6201c3..00000000000000
--- a/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_MCSAFE_TEST
-extern unsigned long mcsafe_test_src;
-extern unsigned long mcsafe_test_dst;
-
-static inline void mcsafe_inject_src(void *addr)
-{
-	if (addr)
-		mcsafe_test_src = (unsigned long) addr;
-	else
-		mcsafe_test_src = ~0UL;
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-	if (addr)
-		mcsafe_test_dst = (unsigned long) addr;
-	else
-		mcsafe_test_dst = ~0UL;
-}
-#else /* CONFIG_MCSAFE_TEST */
-static inline void mcsafe_inject_src(void *addr)
-{
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_MCSAFE_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_MCSAFE_TEST
-.macro MCSAFE_TEST_CTL
-	.pushsection .data
-	.align 8
-	.globl mcsafe_test_src
-	mcsafe_test_src:
-		.quad 0
-	EXPORT_SYMBOL_GPL(mcsafe_test_src)
-	.globl mcsafe_test_dst
-	mcsafe_test_dst:
-		.quad 0
-	EXPORT_SYMBOL_GPL(mcsafe_test_dst)
-	.popsection
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-	leaq \count(\reg), %r9
-	cmp mcsafe_test_src, %r9
-	ja \target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-	leaq \count(\reg), %r9
-	cmp mcsafe_test_dst, %r9
-	ja \target
-.endm
-#else
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* CONFIG_MCSAFE_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 75314c3dbe471e..6e450827f677a9 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
 
 #endif
 
-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
-		size_t cnt);
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
-
-/**
- * memcpy_mcsafe - copy memory with indication if a machine check happened
- *
- * @dst:	destination address
- * @src:	source address
- * @cnt:	number of bytes to copy
- *
- * Low level memory copy function that catches machine checks
- * We only call into the "safe" function on systems that can
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
- *
- * Return 0 for success, or number of bytes not copied if there was an
- * exception.
- */
-static __always_inline __must_check unsigned long
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
-{
-#ifdef CONFIG_X86_MCE
-	if (static_branch_unlikely(&mcsafe_key))
-		return __memcpy_mcsafe(dst, src, cnt);
-	else
-#endif
-		memcpy(dst, src, cnt);
-	return 0;
-}
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
 void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ecefaffd15d4c8..eff7fb84714985 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+unsigned long __must_check
+copy_mc_to_user(void *to, const void *from, unsigned len);
+#endif
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index bc10e3dc64fed7..e7265a552f4f0c 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
 	return ret;
 }
 
-static __always_inline __must_check unsigned long
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
-{
-	unsigned long ret;
-
-	__uaccess_begin();
-	/*
-	 * Note, __memcpy_mcsafe() is explicitly used since it can
-	 * handle exceptions / faults.  memcpy_mcsafe() may fall back to
-	 * memcpy() which lacks this handling.
-	 */
-	ret = __memcpy_mcsafe(to, from, len);
-	__uaccess_end();
-	return ret;
-}
-
 static __always_inline __must_check unsigned long
 raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
 {
@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
 	kasan_check_write(dst, size);
 	return __copy_user_flushcache(dst, src, size);
 }
-
-unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len);
-
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 11b6697be01087..b5b70f4b351dae 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -40,7 +40,6 @@
 #include <linux/debugfs.h>
 #include <linux/irq_work.h>
 #include <linux/export.h>
-#include <linux/jump_label.h>
 #include <linux/set_memory.h>
 #include <linux/sync_core.h>
 #include <linux/task_work.h>
@@ -2124,7 +2123,7 @@ void mce_disable_bank(int bank)
 	and older.
  * mce=nobootlog Don't log MCEs from before booting.
  * mce=bios_cmci_threshold Don't program the CMCI threshold
- * mce=recovery force enable memcpy_mcsafe()
+ * mce=recovery force enable copy_mc_fragile()
  */
 static int __init mcheck_enable(char *str)
 {
@@ -2732,13 +2731,10 @@ static void __init mcheck_debugfs_init(void)
 static void __init mcheck_debugfs_init(void) { }
 #endif
 
-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
-EXPORT_SYMBOL_GPL(mcsafe_key);
-
 static int __init mcheck_late_init(void)
 {
 	if (mca_cfg.recovery)
-		static_branch_inc(&mcsafe_key);
+		enable_copy_mc_fragile();
 
 	mcheck_debugfs_init();
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b10717c9321b6..6d0df6a58873d7 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,6 +8,7 @@
 
 #include <asm/hpet.h>
 #include <asm/setup.h>
+#include <asm/mce.h>
 
 #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
 
@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
 			amd_disable_seq_and_redirect_scrub);
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-#include <linux/jump_label.h>
-#include <asm/string_64.h>
-
 /* Ivy Bridge, Haswell, Broadwell */
 static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 {
@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 	pci_read_config_dword(pdev, 0x84, &capid0);
 
 	if (capid0 & 0x10)
-		static_branch_inc(&mcsafe_key);
+		enable_copy_mc_fragile();
 }
 
 /* Skylake */
@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
 	 * enabled, so memory machine check recovery is also enabled.
 	 */
 	if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
-		static_branch_inc(&mcsafe_key);
+		enable_copy_mc_fragile();
 
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
-#endif
 
 bool x86_apple_machine;
 EXPORT_SYMBOL(x86_apple_machine);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index d46fff11f06fa3..f7d23c9c22b2f8 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
 lib-y := delay.o misc.o cmdline.o cpu.o
 lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
new file mode 100644
index 00000000000000..2633635530b7b4
--- /dev/null
+++ b/arch/x86/lib/copy_mc.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/mce.h>
+
+#ifdef CONFIG_X86_MCE
+/*
+ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
+ * implementation.
+ */
+static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
+
+void enable_copy_mc_fragile(void)
+{
+	static_branch_inc(&copy_mc_fragile_key);
+}
+#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
+
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point, or
+ * source exception point.
+ */
+__visible notrace unsigned long
+copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
+{
+	for (; len; --len, to++, from++)
+		if (copy_mc_fragile(to, from, 1))
+			break;
+	return len;
+}
+#else
+/*
+ * No point in doing careful copying, or consulting a static key when
+ * there is no #MC handler in the CONFIG_X86_MCE=n case.
+ */
+void enable_copy_mc_fragile(void)
+{
+}
+#define copy_mc_fragile_enabled (0)
+#endif
+
+/**
+ * copy_mc_to_kernel - memory copy that handles source exceptions
+ *
+ * @dst:	destination address
+ * @src:	source address
+ * @len:	number of bytes to copy
+ *
+ * Call into the 'fragile' version on systems that have trouble
+ * actually do machine check recovery. Everyone else can just
+ * use memcpy().
+ *
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
+ */
+unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
+{
+	if (copy_mc_fragile_enabled)
+		return copy_mc_fragile(dst, src, len);
+	memcpy(dst, src, len);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
+
+unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
+{
+	unsigned long ret;
+
+	if (!copy_mc_fragile_enabled)
+		return copy_user_generic(dst, src, len);
+
+	__uaccess_begin();
+	ret = copy_mc_fragile(dst, src, len);
+	__uaccess_end();
+	return ret;
+}
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
new file mode 100644
index 00000000000000..c3b613c4544a37
--- /dev/null
+++ b/arch/x86/lib/copy_mc_64.S
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/linkage.h>
+#include <asm/copy_mc_test.h>
+#include <asm/export.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_UML
+
+#ifdef CONFIG_X86_MCE
+COPY_MC_TEST_CTL
+
+/*
+ * copy_mc_fragile - copy memory with indication if an exception / fault happened
+ *
+ * The 'fragile' version is opted into by platform quirks and takes
+ * pains to avoid unrecoverable corner cases like 'fast-string'
+ * instruction sequences, and consuming poison across a cacheline
+ * boundary. The non-fragile version is equivalent to memcpy()
+ * regardless of CPU machine-check-recovery capability.
+ */
+SYM_FUNC_START(copy_mc_fragile)
+	cmpl $8, %edx
+	/* Less than 8 bytes? Go to byte copy loop */
+	jb .L_no_whole_words
+
+	/* Check for bad alignment of source */
+	testl $7, %esi
+	/* Already aligned */
+	jz .L_8byte_aligned
+
+	/* Copy one byte at a time until source is 8-byte aligned */
+	movl %esi, %ecx
+	andl $7, %ecx
+	subl $8, %ecx
+	negl %ecx
+	subl %ecx, %edx
+.L_read_leading_bytes:
+	movb (%rsi), %al
+	COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
+	COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_read_leading_bytes
+
+.L_8byte_aligned:
+	movl %edx, %ecx
+	andl $7, %edx
+	shrl $3, %ecx
+	jz .L_no_whole_words
+
+.L_read_words:
+	movq (%rsi), %r8
+	COPY_MC_TEST_SRC %rsi 8 .E_read_words
+	COPY_MC_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+	movq %r8, (%rdi)
+	addq $8, %rsi
+	addq $8, %rdi
+	decl %ecx
+	jnz .L_read_words
+
+	/* Any trailing bytes? */
+.L_no_whole_words:
+	andl %edx, %edx
+	jz .L_done_memcpy_trap
+
+	/* Copy trailing bytes */
+	movl %edx, %ecx
+.L_read_trailing_bytes:
+	movb (%rsi), %al
+	COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
+	COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_read_trailing_bytes
+
+	/* Copy successful. Return zero */
+.L_done_memcpy_trap:
+	xorl %eax, %eax
+.L_done:
+	ret
+SYM_FUNC_END(copy_mc_fragile)
+EXPORT_SYMBOL_GPL(copy_mc_fragile)
+
+	.section .fixup, "ax"
+	/*
+	 * Return number of bytes not copied for any failure. Note that
+	 * there is no "tail" handling since the source buffer is 8-byte
+	 * aligned and poison is cacheline aligned.
+	 */
+.E_read_words:
+	shll	$3, %ecx
+.E_leading_bytes:
+	addl	%edx, %ecx
+.E_trailing_bytes:
+	mov	%ecx, %eax
+	jmp	.L_done
+
+	/*
+	 * For write fault handling, given the destination is unaligned,
+	 * we handle faults on multi-byte writes with a byte-by-byte
+	 * copy up to the write-protected page.
+	 */
+.E_write_words:
+	shll	$3, %ecx
+	addl	%edx, %ecx
+	movl	%ecx, %edx
+	jmp copy_mc_fragile_handle_tail
+
+	.previous
+
+	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE(.L_write_words, .E_write_words)
+	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+#endif /* CONFIG_X86_MCE */
+#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bbcc05bcefadb2..037faac46b0cc9 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 
@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
 SYM_FUNC_END(memcpy_orig)
 
 .popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b0dfac3d3df712..5f1d4a9ebd5a16 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
 }
 EXPORT_SYMBOL(clear_user);
 
-/*
- * Similar to copy_user_handle_tail, probe for the write fault point,
- * but reuse __memcpy_mcsafe in case a new read error is encountered.
- * clac() is handled in _copy_to_iter_mcsafe().
- */
-__visible notrace unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
-	for (; len; --len, to++, from++) {
-		/*
-		 * Call the assembly routine back directly since
-		 * memcpy_mcsafe() may silently fallback to memcpy.
-		 */
-		unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
-		if (rem)
-			break;
-	}
-	return len;
-}
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 /**
  * clean_cache_range - write back a cache range with CLWB
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 86dbe0c8b45c60..9fc18fadacc691 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -49,7 +49,7 @@ do {								\
 #define pmem_assign(dest, src)	((dest) = (src))
 #endif
 
-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
+#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
 #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
 #endif
 
@@ -984,7 +984,8 @@ static void writecache_resume(struct dm_target *ti)
 	}
 	wc->freelist_size = 0;
 
-	r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
+	r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
+			      sizeof(uint64_t));
 	if (r) {
 		writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
 		sb_seq_count = cpu_to_le64(0);
@@ -1000,7 +1001,8 @@ static void writecache_resume(struct dm_target *ti)
 			e->seq_count = -1;
 			continue;
 		}
-		r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+		r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
+				      sizeof(struct wc_memory_entry));
 		if (r) {
 			writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
 					 (unsigned long)b, r);
@@ -1198,7 +1200,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
 
 		if (rw == READ) {
 			int r;
-			r = memcpy_mcsafe(buf, data, size);
+			r = copy_mc_to_kernel(buf, data, size);
 			flush_dcache_page(bio_page(bio));
 			if (unlikely(r)) {
 				writecache_error(wc, r, "hardware memory error when reading data: %d", r);
@@ -2341,7 +2343,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		}
 	}
 
-	r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+	r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
 	if (r) {
 		ti->error = "Hardware memory error when reading superblock";
 		goto bad;
@@ -2352,7 +2354,8 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
 			ti->error = "Unable to initialize device";
 			goto bad;
 		}
-		r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+		r = copy_mc_to_kernel(&s, sb(wc),
+				      sizeof(struct wc_memory_superblock));
 		if (r) {
 			ti->error = "Hardware memory error when reading superblock";
 			goto bad;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 45964acba9443e..22d865ba6353d2 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 	if (rw == READ) {
 		if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
 			return -EIO;
-		if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+		if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
 			return -EIO;
 		return 0;
 	}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index fab29b514372d0..5c6939e004e2d8 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 	while (len) {
 		mem = kmap_atomic(page);
 		chunk = min_t(unsigned int, len, PAGE_SIZE - off);
-		rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+		rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
 		kunmap_atomic(mem);
 		if (rem)
 			return BLK_STS_IOERR;
@@ -304,7 +304,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
 
 /*
  * Use the 'no check' versions of copy_from_iter_flushcache() and
- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
+ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
  * checking, both file offset and device offset, is handled by
  * dax_iomap_actor()
  */
@@ -317,7 +317,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 		void *addr, size_t bytes, struct iov_iter *i)
 {
-	return _copy_to_iter_mcsafe(addr, bytes, i);
+	return _copy_mc_to_iter(addr, bytes, i);
 }
 
 static const struct dax_operations pmem_dax_ops = {
diff --git a/include/linux/string.h b/include/linux/string.h
index 9b7a0632e87aac..b1f3894a0a3e4c 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
 #ifndef __HAVE_ARCH_MEMCHR
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
-static inline __must_check unsigned long memcpy_mcsafe(void *dst,
-		const void *src, size_t cnt)
-{
-	memcpy(dst, src, cnt);
-	return 0;
-}
-#endif
 #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
 static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
 {
 	memcpy(dst, src, cnt);
 }
 #endif
+
 void *memchr_inv(const void *s, int c, size_t n);
 char *strreplace(char *s, char old, char new);
 
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 94b28541165929..1ae36bc8db351d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
 }
 #endif
 
+#ifndef copy_mc_to_kernel
+/*
+ * Without arch opt-in this generic copy_mc_to_kernel() will not handle
+ * #MC (or arch equivalent) during source read.
+ */
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
+{
+	memcpy(dst, src, cnt);
+	return 0;
+}
+#endif
+
 static __always_inline void pagefault_disabled_inc(void)
 {
 	current->pagefault_disabled++;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..f14410c678bd50 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
 #define _copy_from_iter_flushcache _copy_from_iter_nocache
 #endif
 
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 #else
-#define _copy_to_iter_mcsafe _copy_to_iter
+#define _copy_mc_to_iter _copy_to_iter
 #endif
 
 static __always_inline __must_check
@@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 }
 
 static __always_inline __must_check
-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
+size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, true)))
 		return 0;
 	else
-		return _copy_to_iter_mcsafe(addr, bytes, i);
+		return _copy_mc_to_iter(addr, bytes, i);
 }
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
diff --git a/lib/Kconfig b/lib/Kconfig
index b4b98a03ff987c..b46a9fd122c81a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -635,7 +635,12 @@ config UACCESS_MEMCPY
 config ARCH_HAS_UACCESS_FLUSHCACHE
 	bool
 
-config ARCH_HAS_UACCESS_MCSAFE
+# arch has a concept of a recoverable synchronous exception due to a
+# memory-read error like x86 machine-check or ARM data-abort, and
+# implements copy_mc_to_{user,kernel} to abort and report
+# 'bytes-transferred' if that exception fires when accessing the source
+# buffer.
+config ARCH_HAS_COPY_MC
 	bool
 
 # Temporary. Goes away when all archs are cleaned up
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f1232..d13304a034f5ee 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(_copy_to_iter);
 
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+static int copyout_mc(void __user *to, const void *from, size_t n)
 {
 	if (access_ok(to, n)) {
 		instrument_copy_to_user(to, from, n);
-		n = copy_to_user_mcsafe((__force void *) to, from, n);
+		n = copy_mc_to_user((__force void *) to, from, n);
 	}
 	return n;
 }
 
-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+static unsigned long copy_mc_to_page(struct page *page, size_t offset,
 		const char *from, size_t len)
 {
 	unsigned long ret;
 	char *to;
 
 	to = kmap_atomic(page);
-	ret = memcpy_mcsafe(to + offset, from, len);
+	ret = copy_mc_to_kernel(to + offset, from, len);
 	kunmap_atomic(to);
 
 	return ret;
 }
 
-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
+static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
 				struct iov_iter *i)
 {
 	struct pipe_inode_info *pipe = i->pipe;
@@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 		unsigned long rem;
 
-		rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
+		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
 					    off, addr, chunk);
 		i->head = i_head;
 		i->iov_offset = off + chunk - rem;
@@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 }
 
 /**
- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
+ * _copy_mc_to_iter - copy to iter with source memory error exception handling
  * @addr: source kernel address
  * @bytes: total transfer length
  * @iter: destination iterator
  *
- * The pmem driver arranges for filesystem-dax to use this facility via
- * dax_copy_to_iter() for protecting read/write to persistent memory.
- * Unless / until an architecture can guarantee identical performance
- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
- * performance regression to switch more users to the mcsafe version.
+ * The pmem driver deploys this for the dax operation
+ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
+ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
+ * successfully copied.
  *
- * Otherwise, the main differences between this and typical _copy_to_iter().
+ * The main differences between this and typical _copy_to_iter().
  *
  * * Typical tail/residue handling after a fault retries the copy
  *   byte-by-byte until the fault happens again. Re-triggering machine
@@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  *   a short copy.
- *
- * See MCSAFE_TEST for self-test.
  */
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
 	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
+		return copy_mc_pipe_to_iter(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
 	iterate_and_advance(i, bytes, v,
-		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
+			   v.iov_len),
 		({
-		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
-                               (from += v.bv_len) - v.bv_len, v.bv_len);
+		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
+				      (from += v.bv_len) - v.bv_len, v.bv_len);
 		if (rem) {
 			curr_addr = (unsigned long) from;
 			bytes = curr_addr - s_addr - rem;
@@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 		}
 		}),
 		({
-		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
-				v.iov_len);
+		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
+					- v.iov_len, v.iov_len);
 		if (rem) {
 			curr_addr = (unsigned long) from;
 			bytes = curr_addr - s_addr - rem;
@@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
+#endif /* CONFIG_ARCH_HAS_COPY_MC */
 
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad455..00000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 45f8e1b02241f2..0b5b8ae56bd917 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
 SYM_FUNC_END(memcpy_orig)
 
 .popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e034a8f24f4668..893f021fec639b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -548,8 +548,8 @@ static const char *uaccess_safe_builtin[] = {
 	"__ubsan_handle_shift_out_of_bounds",
 	/* misc */
 	"csum_partial_copy_generic",
-	"__memcpy_mcsafe",
-	"mcsafe_handle_tail",
+	"copy_mc_fragile",
+	"copy_mc_fragile_handle_tail",
 	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
 	NULL
 };
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index dd68a40a790c59..878db6a59a4103 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -13,7 +13,6 @@ perf-y += synthesize.o
 perf-y += kallsyms-parse.o
 perf-y += find-bit-bench.o
 
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
 
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84b2..00000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
-	for (; len; --len, to++, from++) {
-		/*
-		 * Call the assembly routine back directly since
-		 * memcpy_mcsafe() may silently fallback to memcpy.
-		 */
-		unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
-		if (rem)
-			break;
-	}
-	return len;
-}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a1a5dc645b4011..2ac0fff6dad825 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
 
 /*
  * Generate an NFIT table to describe the following topology:
@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 
 enum INJECT {
 	INJECT_NONE,
@@ -3291,7 +3292,7 @@ enum INJECT {
 	INJECT_DST,
 };
 
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
 {
 	size_t i;
 
@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
 		src[i] = (char) i;
 }
 
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
 		size_t size, unsigned long rem)
 {
 	size_t i;
@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
 	return true;
 }
 
-void mcsafe_test(void)
+void copy_mc_test(void)
 {
 	char *inject_desc[] = { "none", "source", "destination" };
 	enum INJECT inj;
 
-	if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+	if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
 		pr_info("%s: run...\n", __func__);
 	} else {
 		pr_info("%s: disabled, skip.\n", __func__);
@@ -3344,31 +3345,31 @@ void mcsafe_test(void)
 
 			switch (inj) {
 			case INJECT_NONE:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 0;
 				break;
 			case INJECT_SRC:
-				mcsafe_inject_src(&mcsafe_buf[1024]);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(&copy_mc_buf[1024]);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 512 - i;
 				break;
 			case INJECT_DST:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(&mcsafe_buf[2048]);
-				dst = &mcsafe_buf[2048 - i];
-				src = &mcsafe_buf[1024];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(&copy_mc_buf[2048]);
+				dst = &copy_mc_buf[2048 - i];
+				src = &copy_mc_buf[1024];
 				expect = 512 - i;
 				break;
 			}
 
-			mcsafe_test_init(dst, src, 512);
-			rem = __memcpy_mcsafe(dst, src, 512);
-			valid = mcsafe_test_validate(dst, src, 512, expect);
+			copy_mc_test_init(dst, src, 512);
+			rem = copy_mc_fragile(dst, src, 512);
+			valid = copy_mc_test_validate(dst, src, 512, expect);
 			if (rem == expect && valid)
 				continue;
 			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
 		}
 	}
 
-	mcsafe_inject_src(NULL);
-	mcsafe_inject_dst(NULL);
+	copy_mc_inject_src(NULL);
+	copy_mc_inject_dst(NULL);
 }
 
 static __init int nfit_test_init(void)
@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
-	mcsafe_test();
+	copy_mc_test();
 	dax_pmem_test();
 	dax_pmem_core_test();
 #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b825539..994b11af765cef 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
 copyuser_64_exc_t0
 copyuser_64_exc_t1
 copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c781d..3095b1f1c02b32 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 		copyuser_p7_t0 copyuser_p7_t1 \
 		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
-		memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+		memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
 		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%:	memcpy_power7.S $(EXTRA_SOURCES)
 		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
 		-o $@ $^
 
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) \
-		-D COPY_LOOP=test_memcpy_mcsafe \
+		-D COPY_LOOP=test_copy_mc_generic \
 		-o $@ $^
 
 $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 00000000000000..dcbe06d500fb2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f63d..00000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file

From 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Oct 2020 20:40:25 -0700
Subject: [PATCH 237/262] x86/copy_mc: Introduce copy_mc_enhanced_fast_string()

The motivations to go rework memcpy_mcsafe() are that the benefit of
doing slow and careful copies is obviated on newer CPUs, and that the
current opt-in list of CPUs to instrument recovery is broken relative to
those CPUs.  There is no need to keep an opt-in list up to date on an
ongoing basis if pmem/dax operations are instrumented for recovery by
default. With recovery enabled by default the old "mcsafe_key" opt-in to
careful copying can be made a "fragile" opt-out. Where the "fragile"
list takes steps to not consume poison across cachelines.

The discussion with Linus made clear that the current "_mcsafe" suffix
was imprecise to a fault. The operations that are needed by pmem/dax are
to copy from a source address that might throw #MC to a destination that
may write-fault, if it is a user page.

So copy_to_user_mcsafe() becomes copy_mc_to_user() to indicate
the separate precautions taken on source and destination.
copy_mc_to_kernel() is introduced as a non-SMAP version that does not
expect write-faults on the destination, but is still prepared to abort
with an error code upon taking #MC.

The original copy_mc_fragile() implementation had negative performance
implications since it did not use the fast-string instruction sequence
to perform copies. For this reason copy_mc_to_kernel() fell back to
plain memcpy() to preserve performance on platforms that did not indicate
the capability to recover from machine check exceptions. However, that
capability detection was not architectural and now that some platforms
can recover from fast-string consumption of memory errors the memcpy()
fallback now causes these more capable platforms to fail.

Introduce copy_mc_enhanced_fast_string() as the fast default
implementation of copy_mc_to_kernel() and finalize the transition of
copy_mc_fragile() to be a platform quirk to indicate 'copy-carefully'.
With this in place, copy_mc_to_kernel() is fast and recovery-ready by
default regardless of hardware capability.

Thanks to Vivek for identifying that copy_user_generic() is not suitable
as the copy_mc_to_user() backend since the #MC handler explicitly checks
ex_has_fault_handler(). Thanks to the 0day robot for catching a
performance bug in the x86/copy_mc_to_user implementation.

 [ bp: Add the "why" for this change from the 0/2th message, massage. ]

Fixes: 92b0729c34ca ("x86/mm, x86/mce: Add memcpy_mcsafe()")
Reported-by: Erwin Tsaur <erwin.tsaur@intel.com>
Reported-by: 0day robot <lkp@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Tested-by: Erwin Tsaur <erwin.tsaur@intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/160195562556.2163339.18063423034951948973.stgit@dwillia2-desk3.amr.corp.intel.com
---
 arch/x86/lib/copy_mc.c    | 32 +++++++++++++++++++++++---------
 arch/x86/lib/copy_mc_64.S | 36 ++++++++++++++++++++++++++++++++++++
 tools/objtool/check.c     |  1 +
 3 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
index 2633635530b7b4..c13e8c9ee926b7 100644
--- a/arch/x86/lib/copy_mc.c
+++ b/arch/x86/lib/copy_mc.c
@@ -45,6 +45,8 @@ void enable_copy_mc_fragile(void)
 #define copy_mc_fragile_enabled (0)
 #endif
 
+unsigned long copy_mc_enhanced_fast_string(void *dst, const void *src, unsigned len);
+
 /**
  * copy_mc_to_kernel - memory copy that handles source exceptions
  *
@@ -52,9 +54,11 @@ void enable_copy_mc_fragile(void)
  * @src:	source address
  * @len:	number of bytes to copy
  *
- * Call into the 'fragile' version on systems that have trouble
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
+ * Call into the 'fragile' version on systems that benefit from avoiding
+ * corner case poison consumption scenarios, For example, accessing
+ * poison across 2 cachelines with a single instruction. Almost all
+ * other uses case can use copy_mc_enhanced_fast_string() for a fast
+ * recoverable copy, or fallback to plain memcpy.
  *
  * Return 0 for success, or number of bytes not copied if there was an
  * exception.
@@ -63,6 +67,8 @@ unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigne
 {
 	if (copy_mc_fragile_enabled)
 		return copy_mc_fragile(dst, src, len);
+	if (static_cpu_has(X86_FEATURE_ERMS))
+		return copy_mc_enhanced_fast_string(dst, src, len);
 	memcpy(dst, src, len);
 	return 0;
 }
@@ -72,11 +78,19 @@ unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned
 {
 	unsigned long ret;
 
-	if (!copy_mc_fragile_enabled)
-		return copy_user_generic(dst, src, len);
+	if (copy_mc_fragile_enabled) {
+		__uaccess_begin();
+		ret = copy_mc_fragile(dst, src, len);
+		__uaccess_end();
+		return ret;
+	}
+
+	if (static_cpu_has(X86_FEATURE_ERMS)) {
+		__uaccess_begin();
+		ret = copy_mc_enhanced_fast_string(dst, src, len);
+		__uaccess_end();
+		return ret;
+	}
 
-	__uaccess_begin();
-	ret = copy_mc_fragile(dst, src, len);
-	__uaccess_end();
-	return ret;
+	return copy_user_generic(dst, src, len);
 }
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
index c3b613c4544a37..892d8915f609e8 100644
--- a/arch/x86/lib/copy_mc_64.S
+++ b/arch/x86/lib/copy_mc_64.S
@@ -124,4 +124,40 @@ EXPORT_SYMBOL_GPL(copy_mc_fragile)
 	_ASM_EXTABLE(.L_write_words, .E_write_words)
 	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
 #endif /* CONFIG_X86_MCE */
+
+/*
+ * copy_mc_enhanced_fast_string - memory copy with exception handling
+ *
+ * Fast string copy + fault / exception handling. If the CPU does
+ * support machine check exception recovery, but does not support
+ * recovering from fast-string exceptions then this CPU needs to be
+ * added to the copy_mc_fragile_key set of quirks. Otherwise, absent any
+ * machine check recovery support this version should be no slower than
+ * standard memcpy.
+ */
+SYM_FUNC_START(copy_mc_enhanced_fast_string)
+	movq %rdi, %rax
+	movq %rdx, %rcx
+.L_copy:
+	rep movsb
+	/* Copy successful. Return zero */
+	xorl %eax, %eax
+	ret
+SYM_FUNC_END(copy_mc_enhanced_fast_string)
+
+	.section .fixup, "ax"
+.E_copy:
+	/*
+	 * On fault %rcx is updated such that the copy instruction could
+	 * optionally be restarted at the fault position, i.e. it
+	 * contains 'bytes remaining'. A non-zero return indicates error
+	 * to copy_mc_generic() users, or indicate short transfers to
+	 * user-copy routines.
+	 */
+	movq %rcx, %rax
+	ret
+
+	.previous
+
+	_ASM_EXTABLE_FAULT(.L_copy, .E_copy)
 #endif /* !CONFIG_UML */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 893f021fec639b..b3e4efcf7ca6c0 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -550,6 +550,7 @@ static const char *uaccess_safe_builtin[] = {
 	"csum_partial_copy_generic",
 	"copy_mc_fragile",
 	"copy_mc_fragile_handle_tail",
+	"copy_mc_enhanced_fast_string",
 	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
 	NULL
 };

From 39297dde7390e01bfd737052fbb5313a09062e2d Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:17 -0500
Subject: [PATCH 238/262] x86/platform/uv: Remove UV BAU TLB Shootdown Handler

The Broadcast Assist Unit (BAU) TLB shootdown handler is being rewritten
to become the UV BAU APIC driver. It is designed to speed up sending
IPIs to selective CPUs within the system. Remove the current TLB
shutdown handler (tlb_uv.c) file and a couple of kernel hooks in the
interim.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-2-mike.travis@hpe.com
---
 arch/x86/include/asm/idtentry.h  |    4 -
 arch/x86/include/asm/uv/uv.h     |    4 +-
 arch/x86/include/asm/uv/uv_bau.h |  755 -----------
 arch/x86/kernel/idt.c            |    3 -
 arch/x86/mm/tlb.c                |   24 -
 arch/x86/platform/uv/Makefile    |    2 +-
 arch/x86/platform/uv/tlb_uv.c    | 2097 ------------------------------
 7 files changed, 2 insertions(+), 2887 deletions(-)
 delete mode 100644 arch/x86/include/asm/uv/uv_bau.h
 delete mode 100644 arch/x86/platform/uv/tlb_uv.c

diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a0638640f1edae..df4dc975e8fd54 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -591,10 +591,6 @@ DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR,		sysvec_call_function);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-# ifdef CONFIG_X86_UV
-DECLARE_IDTENTRY_SYSVEC(UV_BAU_MESSAGE,			sysvec_uv_bau_message);
-# endif
-
 # ifdef CONFIG_X86_MCE_THRESHOLD
 DECLARE_IDTENTRY_SYSVEC(THRESHOLD_APIC_VECTOR,		sysvec_threshold);
 # endif
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e48aea9ba47de1..172d3e4a9e4b85 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -35,10 +35,8 @@ extern int is_uv_hubbed(int uvtype);
 extern void uv_cpu_init(void);
 extern void uv_nmi_init(void);
 extern void uv_system_init(void);
-extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-						 const struct flush_tlb_info *info);
 
-#else	/* X86_UV */
+#else	/* !X86_UV */
 
 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
 static inline bool is_early_uv_system(void)	{ return 0; }
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
deleted file mode 100644
index cd24804955d701..00000000000000
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * SGI UV Broadcast Assist Unit definitions
- *
- * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved.
- */
-
-#ifndef _ASM_X86_UV_UV_BAU_H
-#define _ASM_X86_UV_UV_BAU_H
-
-#include <linux/bitmap.h>
-#include <asm/idtentry.h>
-
-#define BITSPERBYTE 8
-
-/*
- * Broadcast Assist Unit messaging structures
- *
- * Selective Broadcast activations are induced by software action
- * specifying a particular 8-descriptor "set" via a 6-bit index written
- * to an MMR.
- * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
- * each 6-bit index value. These descriptor sets are mapped in sequence
- * starting with set 0 located at the address specified in the
- * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
- * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
- *
- * We will use one set for sending BAU messages from each of the
- * cpu's on the uvhub.
- *
- * TLB shootdown will use the first of the 8 descriptors of each set.
- * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
- */
-
-#define MAX_CPUS_PER_UVHUB		128
-#define MAX_CPUS_PER_SOCKET		64
-#define ADP_SZ				64 /* hardware-provided max. */
-#define UV_CPUS_PER_AS			32 /* hardware-provided max. */
-#define ITEMS_PER_DESC			8
-/* the 'throttle' to prevent the hardware stay-busy bug */
-#define MAX_BAU_CONCURRENT		3
-#define UV_ACT_STATUS_MASK		0x3
-#define UV_ACT_STATUS_SIZE		2
-#define UV_DISTRIBUTION_SIZE		256
-#define UV_SW_ACK_NPENDING		8
-#define UV_NET_ENDPOINT_INTD		0x28
-#define UV_PAYLOADQ_GNODE_SHIFT		49
-#define UV_PTC_BASENAME			"sgi_uv/ptc_statistics"
-#define UV_BAU_BASENAME			"sgi_uv/bau_tunables"
-#define UV_BAU_TUNABLES_DIR		"sgi_uv"
-#define UV_BAU_TUNABLES_FILE		"bau_tunables"
-#define WHITESPACE			" \t\n"
-#define cpubit_isset(cpu, bau_local_cpumask) \
-	test_bit((cpu), (bau_local_cpumask).bits)
-
-/* [19:16] SOFT_ACK timeout period  19: 1 is urgency 7  17:16 1 is multiplier */
-/*
- * UV2: Bit 19 selects between
- *  (0): 10 microsecond timebase and
- *  (1): 80 microseconds
- *  we're using 560us
- */
-#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD	(15UL)
-/* assuming UV3 is the same */
-
-#define BAU_MISC_CONTROL_MULT_MASK	3
-
-#define UVH_AGING_PRESCALE_SEL		0x000000b000UL
-/* [30:28] URGENCY_7  an index into a table of times */
-#define BAU_URGENCY_7_SHIFT		28
-#define BAU_URGENCY_7_MASK		7
-
-#define UVH_TRANSACTION_TIMEOUT		0x000000b200UL
-/* [45:40] BAU - BAU transaction timeout select - a multiplier */
-#define BAU_TRANS_SHIFT			40
-#define BAU_TRANS_MASK			0x3f
-
-/*
- * shorten some awkward names
- */
-#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT
-#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT
-#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD
-#define PREFETCH_HINT_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT
-#define SB_STATUS_SHFT UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-#define write_gmmr	uv_write_global_mmr64
-#define write_lmmr	uv_write_local_mmr
-#define read_lmmr	uv_read_local_mmr
-#define read_gmmr	uv_read_global_mmr64
-
-/*
- * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
- */
-#define DS_IDLE				0
-#define DS_ACTIVE			1
-#define DS_DESTINATION_TIMEOUT		2
-#define DS_SOURCE_TIMEOUT		3
-/*
- * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2
- * values 1 and 3 will not occur
- *        Decoded meaning              ERROR  BUSY    AUX ERR
- * -------------------------------     ----   -----   -------
- * IDLE                                 0       0        0
- * BUSY (active)                        0       1        0
- * SW Ack Timeout (destination)         1       0        0
- * SW Ack INTD rejected (strong NACK)   1       0        1
- * Source Side Time Out Detected        1       1        0
- * Destination Side PUT Failed          1       1        1
- */
-#define UV2H_DESC_IDLE			0
-#define UV2H_DESC_BUSY			2
-#define UV2H_DESC_DEST_TIMEOUT		4
-#define UV2H_DESC_DEST_STRONG_NACK	5
-#define UV2H_DESC_SOURCE_TIMEOUT	6
-#define UV2H_DESC_DEST_PUT_ERR		7
-
-/*
- * delay for 'plugged' timeout retries, in microseconds
- */
-#define PLUGGED_DELAY			10
-
-/*
- * threshholds at which to use IPI to free resources
- */
-/* after this # consecutive 'plugged' timeouts, use IPI to release resources */
-#define PLUGSB4RESET			100
-/* after this many consecutive timeouts, use IPI to release resources */
-#define TIMEOUTSB4RESET			1
-/* at this number uses of IPI to release resources, giveup the request */
-#define IPI_RESET_LIMIT			1
-/* after this # consecutive successes, bump up the throttle if it was lowered */
-#define COMPLETE_THRESHOLD		5
-/* after this # of giveups (fall back to kernel IPI's) disable the use of
-   the BAU for a period of time */
-#define GIVEUP_LIMIT			100
-
-#define UV_LB_SUBNODEID			0x10
-
-#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT
-#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK
-/* 4 bits of software ack period */
-#define UV2_ACK_MASK			0x7UL
-#define UV2_ACK_UNITS_SHFT		3
-#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT
-
-/*
- * number of entries in the destination side payload queue
- */
-#define DEST_Q_SIZE			20
-/*
- * number of destination side software ack resources
- */
-#define DEST_NUM_RESOURCES		8
-/*
- * completion statuses for sending a TLB flush message
- */
-#define FLUSH_RETRY_PLUGGED		1
-#define FLUSH_RETRY_TIMEOUT		2
-#define FLUSH_GIVEUP			3
-#define FLUSH_COMPLETE			4
-
-/*
- * tuning the action when the numalink network is extremely delayed
- */
-#define CONGESTED_RESPONSE_US		1000	/* 'long' response time, in
-						   microseconds */
-#define CONGESTED_REPS			10	/* long delays averaged over
-						   this many broadcasts */
-#define DISABLED_PERIOD			10	/* time for the bau to be
-						   disabled, in seconds */
-/* see msg_type: */
-#define MSG_NOOP			0
-#define MSG_REGULAR			1
-#define MSG_RETRY			2
-
-#define BAU_DESC_QUALIFIER		0x534749
-
-enum uv_bau_version {
-	UV_BAU_V2 = 2,
-	UV_BAU_V3,
-	UV_BAU_V4,
-};
-
-/*
- * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
- * If the 'multilevel' flag in the header portion of the descriptor
- * has been set to 0, then endpoint multi-unicast mode is selected.
- * The distribution specification (32 bytes) is interpreted as a 256-bit
- * distribution vector. Adjacent bits correspond to consecutive even numbered
- * nodeIDs. The result of adding the index of a given bit to the 15-bit
- * 'base_dest_nasid' field of the header corresponds to the
- * destination nodeID associated with that specified bit.
- */
-struct pnmask {
-	unsigned long		bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)];
-};
-
-/*
- * mask of cpu's on a uvhub
- * (during initialization we need to check that unsigned long has
- *  enough bits for max. cpu's per uvhub)
- */
-struct bau_local_cpumask {
-	unsigned long		bits;
-};
-
-/*
- * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
- * only 12 bytes (96 bits) of the payload area are usable.
- * An additional 3 bytes (bits 27:4) of the header address are carried
- * to the next bytes of the destination payload queue.
- * And an additional 2 bytes of the header Suppl_A field are also
- * carried to the destination payload queue.
- * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
- * of the destination payload queue, which is written by the hardware
- * with the s/w ack resource bit vector.
- * [ effective message contents (16 bytes (128 bits) maximum), not counting
- *   the s/w ack bit vector  ]
- */
-
-/**
- * struct uv2_3_bau_msg_payload - defines payload for INTD transactions
- * @address:		Signifies a page or all TLB's of the cpu
- * @sending_cpu:	CPU from which the message originates
- * @acknowledge_count:	CPUs on the destination Hub that received the interrupt
- */
-struct uv2_3_bau_msg_payload {
-	u64 address;
-	u16 sending_cpu;
-	u16 acknowledge_count;
-};
-
-/**
- * struct uv4_bau_msg_payload - defines payload for INTD transactions
- * @address:		Signifies a page or all TLB's of the cpu
- * @sending_cpu:	CPU from which the message originates
- * @acknowledge_count:	CPUs on the destination Hub that received the interrupt
- * @qualifier:		Set by source to verify origin of INTD broadcast
- */
-struct uv4_bau_msg_payload {
-	u64 address;
-	u16 sending_cpu;
-	u16 acknowledge_count;
-	u32 reserved:8;
-	u32 qualifier:24;
-};
-
-/*
- * UV2 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
- * see figure 9-2 of harp_sys.pdf
- * assuming UV3 is the same
- */
-struct uv2_3_bau_msg_header {
-	unsigned int	base_dest_nasid:15;	/* nasid of the first bit */
-	/* bits 14:0 */				/* in uvhub map */
-	unsigned int	dest_subnodeid:5;	/* must be 0x10, for the LB */
-	/* bits 19:15 */
-	unsigned int	rsvd_1:1;		/* must be zero */
-	/* bit 20 */
-	/* Address bits 59:21 */
-	/* bits 25:2 of address (44:21) are payload */
-	/* these next 24 bits become bytes 12-14 of msg */
-	/* bits 28:21 land in byte 12 */
-	unsigned int	replied_to:1;		/* sent as 0 by the source to
-						   byte 12 */
-	/* bit 21 */
-	unsigned int	msg_type:3;		/* software type of the
-						   message */
-	/* bits 24:22 */
-	unsigned int	canceled:1;		/* message canceled, resource
-						   is to be freed*/
-	/* bit 25 */
-	unsigned int	payload_1:3;		/* not currently used */
-	/* bits 28:26 */
-
-	/* bits 36:29 land in byte 13 */
-	unsigned int	payload_2a:3;		/* not currently used */
-	unsigned int	payload_2b:5;		/* not currently used */
-	/* bits 36:29 */
-
-	/* bits 44:37 land in byte 14 */
-	unsigned int	payload_3:8;		/* not currently used */
-	/* bits 44:37 */
-
-	unsigned int	rsvd_2:7;		/* reserved */
-	/* bits 51:45 */
-	unsigned int	swack_flag:1;		/* software acknowledge flag */
-	/* bit 52 */
-	unsigned int	rsvd_3a:3;		/* must be zero */
-	unsigned int	rsvd_3b:8;		/* must be zero */
-	unsigned int	rsvd_3c:8;		/* must be zero */
-	unsigned int	rsvd_3d:3;		/* must be zero */
-	/* bits 74:53 */
-	unsigned int	fairness:3;		/* usually zero */
-	/* bits 77:75 */
-
-	unsigned int	sequence:16;		/* message sequence number */
-	/* bits 93:78  Suppl_A  */
-	unsigned int	chaining:1;		/* next descriptor is part of
-						   this activation*/
-	/* bit 94 */
-	unsigned int	multilevel:1;		/* multi-level multicast
-						   format */
-	/* bit 95 */
-	unsigned int	rsvd_4:24;		/* ordered / source node /
-						   source subnode / aging
-						   must be zero */
-	/* bits 119:96 */
-	unsigned int	command:8;		/* message type */
-	/* bits 127:120 */
-};
-
-/*
- * The activation descriptor:
- * The format of the message to send, plus all accompanying control
- * Should be 64 bytes
- */
-struct bau_desc {
-	struct pnmask				distribution;
-	/*
-	 * message template, consisting of header and payload:
-	 */
-	union bau_msg_header {
-		struct uv2_3_bau_msg_header	uv2_3_hdr;
-	} header;
-
-	union bau_payload_header {
-		struct uv2_3_bau_msg_payload	uv2_3;
-		struct uv4_bau_msg_payload	uv4;
-	} payload;
-};
-/* UV2:
- *   -payload--    ---------header------
- *   bytes 0-11    bits 70-78  bits 21-44
- *       A           B  (2)      C (3)
- *
- *            A/B/C are moved to:
- *       A            C          B
- *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
- *   ------------payload queue-----------
- */
-
-/*
- * The payload queue on the destination side is an array of these.
- * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
- * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
- * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
- * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
- *  swack_vec and payload_2)
- * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
- *  Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
- *  operation."
- */
-struct bau_pq_entry {
-	unsigned long	address;	/* signifies a page or all TLB's
-					   of the cpu */
-	/* 64 bits, bytes 0-7 */
-	unsigned short	sending_cpu;	/* cpu that sent the message */
-	/* 16 bits, bytes 8-9 */
-	unsigned short	acknowledge_count; /* filled in by destination */
-	/* 16 bits, bytes 10-11 */
-	/* these next 3 bytes come from bits 58-81 of the message header */
-	unsigned short	replied_to:1;	/* sent as 0 by the source */
-	unsigned short	msg_type:3;	/* software message type */
-	unsigned short	canceled:1;	/* sent as 0 by the source */
-	unsigned short	unused1:3;	/* not currently using */
-	/* byte 12 */
-	unsigned char	unused2a;	/* not currently using */
-	/* byte 13 */
-	unsigned char	unused2;	/* not currently using */
-	/* byte 14 */
-	unsigned char	swack_vec;	/* filled in by the hardware */
-	/* byte 15 (bits 127:120) */
-	unsigned short	sequence;	/* message sequence number */
-	/* bytes 16-17 */
-	unsigned char	unused4[2];	/* not currently using bytes 18-19 */
-	/* bytes 18-19 */
-	int		number_of_cpus;	/* filled in at destination */
-	/* 32 bits, bytes 20-23 (aligned) */
-	unsigned char	unused5[8];	/* not using */
-	/* bytes 24-31 */
-};
-
-struct msg_desc {
-	struct bau_pq_entry	*msg;
-	int			msg_slot;
-	struct bau_pq_entry	*queue_first;
-	struct bau_pq_entry	*queue_last;
-};
-
-struct reset_args {
-	int			sender;
-};
-
-/*
- * This structure is allocated per_cpu for UV TLB shootdown statistics.
- */
-struct ptc_stats {
-	/* sender statistics */
-	unsigned long	s_giveup;		/* number of fall backs to
-						   IPI-style flushes */
-	unsigned long	s_requestor;		/* number of shootdown
-						   requests */
-	unsigned long	s_stimeout;		/* source side timeouts */
-	unsigned long	s_dtimeout;		/* destination side timeouts */
-	unsigned long	s_strongnacks;		/* number of strong nack's */
-	unsigned long	s_time;			/* time spent in sending side */
-	unsigned long	s_retriesok;		/* successful retries */
-	unsigned long	s_ntargcpu;		/* total number of cpu's
-						   targeted */
-	unsigned long	s_ntargself;		/* times the sending cpu was
-						   targeted */
-	unsigned long	s_ntarglocals;		/* targets of cpus on the local
-						   blade */
-	unsigned long	s_ntargremotes;		/* targets of cpus on remote
-						   blades */
-	unsigned long	s_ntarglocaluvhub;	/* targets of the local hub */
-	unsigned long	s_ntargremoteuvhub;	/* remotes hubs targeted */
-	unsigned long	s_ntarguvhub;		/* total number of uvhubs
-						   targeted */
-	unsigned long	s_ntarguvhub16;		/* number of times target
-						   hubs >= 16*/
-	unsigned long	s_ntarguvhub8;		/* number of times target
-						   hubs >= 8 */
-	unsigned long	s_ntarguvhub4;		/* number of times target
-						   hubs >= 4 */
-	unsigned long	s_ntarguvhub2;		/* number of times target
-						   hubs >= 2 */
-	unsigned long	s_ntarguvhub1;		/* number of times target
-						   hubs == 1 */
-	unsigned long	s_resets_plug;		/* ipi-style resets from plug
-						   state */
-	unsigned long	s_resets_timeout;	/* ipi-style resets from
-						   timeouts */
-	unsigned long	s_busy;			/* status stayed busy past
-						   s/w timer */
-	unsigned long	s_throttles;		/* waits in throttle */
-	unsigned long	s_retry_messages;	/* retry broadcasts */
-	unsigned long	s_bau_reenabled;	/* for bau enable/disable */
-	unsigned long	s_bau_disabled;		/* for bau enable/disable */
-	unsigned long	s_uv2_wars;		/* uv2 workaround, perm. busy */
-	unsigned long	s_uv2_wars_hw;		/* uv2 workaround, hiwater */
-	unsigned long	s_uv2_war_waits;	/* uv2 workaround, long waits */
-	unsigned long	s_overipilimit;		/* over the ipi reset limit */
-	unsigned long	s_giveuplimit;		/* disables, over giveup limit*/
-	unsigned long	s_enters;		/* entries to the driver */
-	unsigned long	s_ipifordisabled;	/* fall back to IPI; disabled */
-	unsigned long	s_plugged;		/* plugged by h/w bug*/
-	unsigned long	s_congested;		/* giveup on long wait */
-	/* destination statistics */
-	unsigned long	d_alltlb;		/* times all tlb's on this
-						   cpu were flushed */
-	unsigned long	d_onetlb;		/* times just one tlb on this
-						   cpu was flushed */
-	unsigned long	d_multmsg;		/* interrupts with multiple
-						   messages */
-	unsigned long	d_nomsg;		/* interrupts with no message */
-	unsigned long	d_time;			/* time spent on destination
-						   side */
-	unsigned long	d_requestee;		/* number of messages
-						   processed */
-	unsigned long	d_retries;		/* number of retry messages
-						   processed */
-	unsigned long	d_canceled;		/* number of messages canceled
-						   by retries */
-	unsigned long	d_nocanceled;		/* retries that found nothing
-						   to cancel */
-	unsigned long	d_resets;		/* number of ipi-style requests
-						   processed */
-	unsigned long	d_rcanceled;		/* number of messages canceled
-						   by resets */
-};
-
-struct tunables {
-	int			*tunp;
-	int			deflt;
-};
-
-struct hub_and_pnode {
-	short			uvhub;
-	short			pnode;
-};
-
-struct socket_desc {
-	short			num_cpus;
-	short			cpu_number[MAX_CPUS_PER_SOCKET];
-};
-
-struct uvhub_desc {
-	unsigned short		socket_mask;
-	short			num_cpus;
-	short			uvhub;
-	short			pnode;
-	struct socket_desc	socket[2];
-};
-
-/**
- * struct bau_control
- * @status_mmr: location of status mmr, determined by uvhub_cpu
- * @status_index: index of ERR|BUSY bits in status mmr, determined by uvhub_cpu
- *
- * Per-cpu control struct containing CPU topology information and BAU tuneables.
- */
-struct bau_control {
-	struct bau_desc		*descriptor_base;
-	struct bau_pq_entry	*queue_first;
-	struct bau_pq_entry	*queue_last;
-	struct bau_pq_entry	*bau_msg_head;
-	struct bau_control	*uvhub_master;
-	struct bau_control	*socket_master;
-	struct ptc_stats	*statp;
-	cpumask_t		*cpumask;
-	unsigned long		timeout_interval;
-	unsigned long		set_bau_on_time;
-	atomic_t		active_descriptor_count;
-	int			plugged_tries;
-	int			timeout_tries;
-	int			ipi_attempts;
-	int			conseccompletes;
-	u64			status_mmr;
-	int			status_index;
-	bool			nobau;
-	short			baudisabled;
-	short			cpu;
-	short			osnode;
-	short			uvhub_cpu;
-	short			uvhub;
-	short			uvhub_version;
-	short			cpus_in_socket;
-	short			cpus_in_uvhub;
-	short			partition_base_pnode;
-	short			busy;       /* all were busy (war) */
-	unsigned short		message_number;
-	unsigned short		uvhub_quiesce;
-	short			socket_acknowledge_count[DEST_Q_SIZE];
-	cycles_t		send_message;
-	cycles_t		period_end;
-	cycles_t		period_time;
-	spinlock_t		uvhub_lock;
-	spinlock_t		queue_lock;
-	spinlock_t		disable_lock;
-	/* tunables */
-	int			max_concurr;
-	int			max_concurr_const;
-	int			plugged_delay;
-	int			plugsb4reset;
-	int			timeoutsb4reset;
-	int			ipi_reset_limit;
-	int			complete_threshold;
-	int			cong_response_us;
-	int			cong_reps;
-	cycles_t		disabled_period;
-	int			period_giveups;
-	int			giveup_limit;
-	long			period_requests;
-	struct hub_and_pnode	*thp;
-};
-
-/* Abstracted BAU functions */
-struct bau_operations {
-	unsigned long	(*read_l_sw_ack)(void);
-	unsigned long	(*read_g_sw_ack)(int pnode);
-	unsigned long	(*bau_gpa_to_offset)(unsigned long vaddr);
-	void		(*write_l_sw_ack)(unsigned long mmr);
-	void		(*write_g_sw_ack)(int pnode, unsigned long mmr);
-	void		(*write_payload_first)(int pnode, unsigned long mmr);
-	void		(*write_payload_last)(int pnode, unsigned long mmr);
-	int		(*wait_completion)(struct bau_desc*,
-				struct bau_control*, long try);
-};
-
-static inline void write_mmr_data_broadcast(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image);
-}
-
-static inline void write_mmr_descriptor_base(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image);
-}
-
-static inline void write_mmr_activation(unsigned long index)
-{
-	write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
-}
-
-static inline void write_gmmr_activation(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
-}
-
-static inline void write_mmr_payload_tail(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image);
-}
-
-static inline void write_mmr_payload_last(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image);
-}
-
-static inline void write_mmr_misc_control(int pnode, unsigned long mmr_image)
-{
-	write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
-}
-
-static inline unsigned long read_mmr_misc_control(int pnode)
-{
-	return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL);
-}
-
-static inline void write_mmr_sw_ack(unsigned long mr)
-{
-	uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline void write_gmmr_sw_ack(int pnode, unsigned long mr)
-{
-	write_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr);
-}
-
-static inline unsigned long read_mmr_sw_ack(void)
-{
-	return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline unsigned long read_gmmr_sw_ack(int pnode)
-{
-	return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
-}
-
-static inline void write_mmr_proc_sw_ack(unsigned long mr)
-{
-	uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr)
-{
-	write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
-}
-
-static inline unsigned long read_mmr_proc_sw_ack(void)
-{
-	return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline unsigned long read_gmmr_proc_sw_ack(int pnode)
-{
-	return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
-}
-
-static inline void write_mmr_data_config(int pnode, unsigned long mr)
-{
-	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
-}
-
-static inline int bau_uvhub_isset(int uvhub, struct pnmask *dstp)
-{
-	return constant_test_bit(uvhub, &dstp->bits[0]);
-}
-static inline void bau_uvhub_set(int pnode, struct pnmask *dstp)
-{
-	__set_bit(pnode, &dstp->bits[0]);
-}
-static inline void bau_uvhubs_clear(struct pnmask *dstp,
-				    int nbits)
-{
-	bitmap_zero(&dstp->bits[0], nbits);
-}
-static inline int bau_uvhub_weight(struct pnmask *dstp)
-{
-	return bitmap_weight((unsigned long *)&dstp->bits[0],
-				UV_DISTRIBUTION_SIZE);
-}
-
-static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
-{
-	bitmap_zero(&dstp->bits, nbits);
-}
-
-struct atomic_short {
-	short counter;
-};
-
-/*
- * atomic_read_short - read a short atomic variable
- * @v: pointer of type atomic_short
- *
- * Atomically reads the value of @v.
- */
-static inline int atomic_read_short(const struct atomic_short *v)
-{
-	return v->counter;
-}
-
-/*
- * atom_asr - add and return a short int
- * @i: short value to add
- * @v: pointer of type atomic_short
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline int atom_asr(short i, struct atomic_short *v)
-{
-	short __i = i;
-	asm volatile(LOCK_PREFIX "xaddw %0, %1"
-			: "+r" (i), "+m" (v->counter)
-			: : "memory");
-	return i + __i;
-}
-
-/*
- * conditionally add 1 to *v, unless *v is >= u
- * return 0 if we cannot add 1 to *v because it is >= u
- * return 1 if we can add 1 to *v because it is < u
- * the add is atomic
- *
- * This is close to atomic_add_unless(), but this allows the 'u' value
- * to be lowered below the current 'v'.  atomic_add_unless can only stop
- * on equal.
- */
-static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u)
-{
-	spin_lock(lock);
-	if (atomic_read(v) >= u) {
-		spin_unlock(lock);
-		return 0;
-	}
-	atomic_inc(v);
-	spin_unlock(lock);
-	return 1;
-}
-
-void uv_bau_message_interrupt(struct pt_regs *regs);
-
-#endif /* _ASM_X86_UV_UV_BAU_H */
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 7ecf9babf0cb63..1bffb87dcfdc4f 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -148,9 +148,6 @@ static const __initconst struct idt_data apic_idts[] = {
 # endif
 # ifdef CONFIG_IRQ_WORK
 	INTG(IRQ_WORK_VECTOR,			asm_sysvec_irq_work),
-# endif
-# ifdef CONFIG_X86_UV
-	INTG(UV_BAU_MESSAGE,			asm_sysvec_uv_bau_message),
 # endif
 	INTG(SPURIOUS_APIC_VECTOR,		asm_sysvec_spurious_apic_interrupt),
 	INTG(ERROR_APIC_VECTOR,			asm_sysvec_error_interrupt),
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 0951b47e64c10b..11666ba19b623e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,7 +14,6 @@
 #include <asm/nospec-branch.h>
 #include <asm/cache.h>
 #include <asm/apic.h>
-#include <asm/uv/uv.h>
 
 #include "mm_internal.h"
 
@@ -800,29 +799,6 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
 		trace_tlb_flush(TLB_REMOTE_SEND_IPI,
 				(info->end - info->start) >> PAGE_SHIFT);
 
-	if (is_uv_system()) {
-		/*
-		 * This whole special case is confused.  UV has a "Broadcast
-		 * Assist Unit", which seems to be a fancy way to send IPIs.
-		 * Back when x86 used an explicit TLB flush IPI, UV was
-		 * optimized to use its own mechanism.  These days, x86 uses
-		 * smp_call_function_many(), but UV still uses a manual IPI,
-		 * and that IPI's action is out of date -- it does a manual
-		 * flush instead of calling flush_tlb_func_remote().  This
-		 * means that the percpu tlb_gen variables won't be updated
-		 * and we'll do pointless flushes on future context switches.
-		 *
-		 * Rather than hooking native_flush_tlb_others() here, I think
-		 * that UV should be updated so that smp_call_function_many(),
-		 * etc, are optimal on UV.
-		 */
-		cpumask = uv_flush_tlb_others(cpumask, info);
-		if (cpumask)
-			smp_call_function_many(cpumask, flush_tlb_func_remote,
-					       (void *)info, 1);
-		return;
-	}
-
 	/*
 	 * If no page tables were freed, we can skip sending IPIs to
 	 * CPUs in lazy TLB mode. They will flush the CPU themselves
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile
index a3693c829e2ea8..224ff0504890ec 100644
--- a/arch/x86/platform/uv/Makefile
+++ b/arch/x86/platform/uv/Makefile
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_X86_UV)		+= tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o
+obj-$(CONFIG_X86_UV)		+= bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
deleted file mode 100644
index 62ea907668f83c..00000000000000
--- a/arch/x86/platform/uv/tlb_uv.c
+++ /dev/null
@@ -1,2097 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *	SGI UltraViolet TLB flush routines.
- *
- *	(c) 2008-2014 Cliff Wickman <cpw@sgi.com>, SGI.
- */
-#include <linux/seq_file.h>
-#include <linux/proc_fs.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-
-#include <asm/mmu_context.h>
-#include <asm/uv/uv.h>
-#include <asm/uv/uv_mmrs.h>
-#include <asm/uv/uv_hub.h>
-#include <asm/uv/uv_bau.h>
-#include <asm/apic.h>
-#include <asm/tsc.h>
-#include <asm/irq_vectors.h>
-#include <asm/timer.h>
-
-static struct bau_operations ops __ro_after_init;
-
-static int timeout_us;
-static bool nobau = true;
-static int nobau_perm;
-
-/* tunables: */
-static int max_concurr		= MAX_BAU_CONCURRENT;
-static int max_concurr_const	= MAX_BAU_CONCURRENT;
-static int plugged_delay	= PLUGGED_DELAY;
-static int plugsb4reset		= PLUGSB4RESET;
-static int giveup_limit		= GIVEUP_LIMIT;
-static int timeoutsb4reset	= TIMEOUTSB4RESET;
-static int ipi_reset_limit	= IPI_RESET_LIMIT;
-static int complete_threshold	= COMPLETE_THRESHOLD;
-static int congested_respns_us	= CONGESTED_RESPONSE_US;
-static int congested_reps	= CONGESTED_REPS;
-static int disabled_period	= DISABLED_PERIOD;
-
-static struct tunables tunables[] = {
-	{&max_concurr,           MAX_BAU_CONCURRENT}, /* must be [0] */
-	{&plugged_delay,         PLUGGED_DELAY},
-	{&plugsb4reset,          PLUGSB4RESET},
-	{&timeoutsb4reset,       TIMEOUTSB4RESET},
-	{&ipi_reset_limit,       IPI_RESET_LIMIT},
-	{&complete_threshold,    COMPLETE_THRESHOLD},
-	{&congested_respns_us,   CONGESTED_RESPONSE_US},
-	{&congested_reps,        CONGESTED_REPS},
-	{&disabled_period,       DISABLED_PERIOD},
-	{&giveup_limit,          GIVEUP_LIMIT}
-};
-
-static struct dentry *tunables_dir;
-
-/* these correspond to the statistics printed by ptc_seq_show() */
-static char *stat_description[] = {
-	"sent:     number of shootdown messages sent",
-	"stime:    time spent sending messages",
-	"numuvhubs: number of hubs targeted with shootdown",
-	"numuvhubs16: number times 16 or more hubs targeted",
-	"numuvhubs8: number times 8 or more hubs targeted",
-	"numuvhubs4: number times 4 or more hubs targeted",
-	"numuvhubs2: number times 2 or more hubs targeted",
-	"numuvhubs1: number times 1 hub targeted",
-	"numcpus:  number of cpus targeted with shootdown",
-	"dto:      number of destination timeouts",
-	"retries:  destination timeout retries sent",
-	"rok:   :  destination timeouts successfully retried",
-	"resetp:   ipi-style resource resets for plugs",
-	"resett:   ipi-style resource resets for timeouts",
-	"giveup:   fall-backs to ipi-style shootdowns",
-	"sto:      number of source timeouts",
-	"bz:       number of stay-busy's",
-	"throt:    number times spun in throttle",
-	"swack:   image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
-	"recv:     shootdown messages received",
-	"rtime:    time spent processing messages",
-	"all:      shootdown all-tlb messages",
-	"one:      shootdown one-tlb messages",
-	"mult:     interrupts that found multiple messages",
-	"none:     interrupts that found no messages",
-	"retry:    number of retry messages processed",
-	"canc:     number messages canceled by retries",
-	"nocan:    number retries that found nothing to cancel",
-	"reset:    number of ipi-style reset requests processed",
-	"rcan:     number messages canceled by reset requests",
-	"disable:  number times use of the BAU was disabled",
-	"enable:   number times use of the BAU was re-enabled"
-};
-
-static int __init setup_bau(char *arg)
-{
-	int result;
-
-	if (!arg)
-		return -EINVAL;
-
-	result = strtobool(arg, &nobau);
-	if (result)
-		return result;
-
-	/* we need to flip the logic here, so that bau=y sets nobau to false */
-	nobau = !nobau;
-
-	if (!nobau)
-		pr_info("UV BAU Enabled\n");
-	else
-		pr_info("UV BAU Disabled\n");
-
-	return 0;
-}
-early_param("bau", setup_bau);
-
-/* base pnode in this partition */
-static int uv_base_pnode __read_mostly;
-
-static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
-static DEFINE_PER_CPU(struct bau_control, bau_control);
-static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
-
-static void
-set_bau_on(void)
-{
-	int cpu;
-	struct bau_control *bcp;
-
-	if (nobau_perm) {
-		pr_info("BAU not initialized; cannot be turned on\n");
-		return;
-	}
-	nobau = false;
-	for_each_present_cpu(cpu) {
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->nobau = false;
-	}
-	pr_info("BAU turned on\n");
-	return;
-}
-
-static void
-set_bau_off(void)
-{
-	int cpu;
-	struct bau_control *bcp;
-
-	nobau = true;
-	for_each_present_cpu(cpu) {
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->nobau = true;
-	}
-	pr_info("BAU turned off\n");
-	return;
-}
-
-/*
- * Determine the first node on a uvhub. 'Nodes' are used for kernel
- * memory allocation.
- */
-static int __init uvhub_to_first_node(int uvhub)
-{
-	int node, b;
-
-	for_each_online_node(node) {
-		b = uv_node_to_blade_id(node);
-		if (uvhub == b)
-			return node;
-	}
-	return -1;
-}
-
-/*
- * Determine the apicid of the first cpu on a uvhub.
- */
-static int __init uvhub_to_first_apicid(int uvhub)
-{
-	int cpu;
-
-	for_each_present_cpu(cpu)
-		if (uvhub == uv_cpu_to_blade_id(cpu))
-			return per_cpu(x86_cpu_to_apicid, cpu);
-	return -1;
-}
-
-/*
- * Free a software acknowledge hardware resource by clearing its Pending
- * bit. This will return a reply to the sender.
- * If the message has timed out, a reply has already been sent by the
- * hardware but the resource has not been released. In that case our
- * clear of the Timeout bit (as well) will free the resource. No reply will
- * be sent (the hardware will only do one reply per message).
- */
-static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp,
-						int do_acknowledge)
-{
-	unsigned long dw;
-	struct bau_pq_entry *msg;
-
-	msg = mdp->msg;
-	if (!msg->canceled && do_acknowledge) {
-		dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
-		ops.write_l_sw_ack(dw);
-	}
-	msg->replied_to = 1;
-	msg->swack_vec = 0;
-}
-
-/*
- * Process the receipt of a RETRY message
- */
-static void bau_process_retry_msg(struct msg_desc *mdp,
-					struct bau_control *bcp)
-{
-	int i;
-	int cancel_count = 0;
-	unsigned long msg_res;
-	unsigned long mmr = 0;
-	struct bau_pq_entry *msg = mdp->msg;
-	struct bau_pq_entry *msg2;
-	struct ptc_stats *stat = bcp->statp;
-
-	stat->d_retries++;
-	/*
-	 * cancel any message from msg+1 to the retry itself
-	 */
-	for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) {
-		if (msg2 > mdp->queue_last)
-			msg2 = mdp->queue_first;
-		if (msg2 == msg)
-			break;
-
-		/* same conditions for cancellation as do_reset */
-		if ((msg2->replied_to == 0) && (msg2->canceled == 0) &&
-		    (msg2->swack_vec) && ((msg2->swack_vec &
-			msg->swack_vec) == 0) &&
-		    (msg2->sending_cpu == msg->sending_cpu) &&
-		    (msg2->msg_type != MSG_NOOP)) {
-			mmr = ops.read_l_sw_ack();
-			msg_res = msg2->swack_vec;
-			/*
-			 * This is a message retry; clear the resources held
-			 * by the previous message only if they timed out.
-			 * If it has not timed out we have an unexpected
-			 * situation to report.
-			 */
-			if (mmr & (msg_res << UV_SW_ACK_NPENDING)) {
-				unsigned long mr;
-				/*
-				 * Is the resource timed out?
-				 * Make everyone ignore the cancelled message.
-				 */
-				msg2->canceled = 1;
-				stat->d_canceled++;
-				cancel_count++;
-				mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
-				ops.write_l_sw_ack(mr);
-			}
-		}
-	}
-	if (!cancel_count)
-		stat->d_nocanceled++;
-}
-
-/*
- * Do all the things a cpu should do for a TLB shootdown message.
- * Other cpu's may come here at the same time for this message.
- */
-static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
-						int do_acknowledge)
-{
-	short socket_ack_count = 0;
-	short *sp;
-	struct atomic_short *asp;
-	struct ptc_stats *stat = bcp->statp;
-	struct bau_pq_entry *msg = mdp->msg;
-	struct bau_control *smaster = bcp->socket_master;
-
-	/*
-	 * This must be a normal message, or retry of a normal message
-	 */
-	if (msg->address == TLB_FLUSH_ALL) {
-		flush_tlb_local();
-		stat->d_alltlb++;
-	} else {
-		flush_tlb_one_user(msg->address);
-		stat->d_onetlb++;
-	}
-	stat->d_requestee++;
-
-	/*
-	 * One cpu on each uvhub has the additional job on a RETRY
-	 * of releasing the resource held by the message that is
-	 * being retried.  That message is identified by sending
-	 * cpu number.
-	 */
-	if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master)
-		bau_process_retry_msg(mdp, bcp);
-
-	/*
-	 * This is a swack message, so we have to reply to it.
-	 * Count each responding cpu on the socket. This avoids
-	 * pinging the count's cache line back and forth between
-	 * the sockets.
-	 */
-	sp = &smaster->socket_acknowledge_count[mdp->msg_slot];
-	asp = (struct atomic_short *)sp;
-	socket_ack_count = atom_asr(1, asp);
-	if (socket_ack_count == bcp->cpus_in_socket) {
-		int msg_ack_count;
-		/*
-		 * Both sockets dump their completed count total into
-		 * the message's count.
-		 */
-		*sp = 0;
-		asp = (struct atomic_short *)&msg->acknowledge_count;
-		msg_ack_count = atom_asr(socket_ack_count, asp);
-
-		if (msg_ack_count == bcp->cpus_in_uvhub) {
-			/*
-			 * All cpus in uvhub saw it; reply
-			 * (unless we are in the UV2 workaround)
-			 */
-			reply_to_message(mdp, bcp, do_acknowledge);
-		}
-	}
-
-	return;
-}
-
-/*
- * Determine the first cpu on a pnode.
- */
-static int pnode_to_first_cpu(int pnode, struct bau_control *smaster)
-{
-	int cpu;
-	struct hub_and_pnode *hpp;
-
-	for_each_present_cpu(cpu) {
-		hpp = &smaster->thp[cpu];
-		if (pnode == hpp->pnode)
-			return cpu;
-	}
-	return -1;
-}
-
-/*
- * Last resort when we get a large number of destination timeouts is
- * to clear resources held by a given cpu.
- * Do this with IPI so that all messages in the BAU message queue
- * can be identified by their nonzero swack_vec field.
- *
- * This is entered for a single cpu on the uvhub.
- * The sender want's this uvhub to free a specific message's
- * swack resources.
- */
-static void do_reset(void *ptr)
-{
-	int i;
-	struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id());
-	struct reset_args *rap = (struct reset_args *)ptr;
-	struct bau_pq_entry *msg;
-	struct ptc_stats *stat = bcp->statp;
-
-	stat->d_resets++;
-	/*
-	 * We're looking for the given sender, and
-	 * will free its swack resource.
-	 * If all cpu's finally responded after the timeout, its
-	 * message 'replied_to' was set.
-	 */
-	for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
-		unsigned long msg_res;
-		/* do_reset: same conditions for cancellation as
-		   bau_process_retry_msg() */
-		if ((msg->replied_to == 0) &&
-		    (msg->canceled == 0) &&
-		    (msg->sending_cpu == rap->sender) &&
-		    (msg->swack_vec) &&
-		    (msg->msg_type != MSG_NOOP)) {
-			unsigned long mmr;
-			unsigned long mr;
-			/*
-			 * make everyone else ignore this message
-			 */
-			msg->canceled = 1;
-			/*
-			 * only reset the resource if it is still pending
-			 */
-			mmr = ops.read_l_sw_ack();
-			msg_res = msg->swack_vec;
-			mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
-			if (mmr & msg_res) {
-				stat->d_rcanceled++;
-				ops.write_l_sw_ack(mr);
-			}
-		}
-	}
-	return;
-}
-
-/*
- * Use IPI to get all target uvhubs to release resources held by
- * a given sending cpu number.
- */
-static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
-{
-	int pnode;
-	int apnode;
-	int maskbits;
-	int sender = bcp->cpu;
-	cpumask_t *mask = bcp->uvhub_master->cpumask;
-	struct bau_control *smaster = bcp->socket_master;
-	struct reset_args reset_args;
-
-	reset_args.sender = sender;
-	cpumask_clear(mask);
-	/* find a single cpu for each uvhub in this distribution mask */
-	maskbits = sizeof(struct pnmask) * BITSPERBYTE;
-	/* each bit is a pnode relative to the partition base pnode */
-	for (pnode = 0; pnode < maskbits; pnode++) {
-		int cpu;
-		if (!bau_uvhub_isset(pnode, distribution))
-			continue;
-		apnode = pnode + bcp->partition_base_pnode;
-		cpu = pnode_to_first_cpu(apnode, smaster);
-		cpumask_set_cpu(cpu, mask);
-	}
-
-	/* IPI all cpus; preemption is already disabled */
-	smp_call_function_many(mask, do_reset, (void *)&reset_args, 1);
-	return;
-}
-
-/*
- * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative
- * number, not an absolute. It converts a duration in cycles to a duration in
- * ns.
- */
-static inline unsigned long long cycles_2_ns(unsigned long long cyc)
-{
-	struct cyc2ns_data data;
-	unsigned long long ns;
-
-	cyc2ns_read_begin(&data);
-	ns = mul_u64_u32_shr(cyc, data.cyc2ns_mul, data.cyc2ns_shift);
-	cyc2ns_read_end();
-
-	return ns;
-}
-
-/*
- * The reverse of the above; converts a duration in ns to a duration in cycles.
- */
-static inline unsigned long long ns_2_cycles(unsigned long long ns)
-{
-	struct cyc2ns_data data;
-	unsigned long long cyc;
-
-	cyc2ns_read_begin(&data);
-	cyc = (ns << data.cyc2ns_shift) / data.cyc2ns_mul;
-	cyc2ns_read_end();
-
-	return cyc;
-}
-
-static inline unsigned long cycles_2_us(unsigned long long cyc)
-{
-	return cycles_2_ns(cyc) / NSEC_PER_USEC;
-}
-
-static inline cycles_t sec_2_cycles(unsigned long sec)
-{
-	return ns_2_cycles(sec * NSEC_PER_SEC);
-}
-
-static inline unsigned long long usec_2_cycles(unsigned long usec)
-{
-	return ns_2_cycles(usec * NSEC_PER_USEC);
-}
-
-/*
- * wait for all cpus on this hub to finish their sends and go quiet
- * leaves uvhub_quiesce set so that no new broadcasts are started by
- * bau_flush_send_and_wait()
- */
-static inline void quiesce_local_uvhub(struct bau_control *hmaster)
-{
-	atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce);
-}
-
-/*
- * mark this quiet-requestor as done
- */
-static inline void end_uvhub_quiesce(struct bau_control *hmaster)
-{
-	atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce);
-}
-
-/*
- * UV2 could have an extra bit of status in the ACTIVATION_STATUS_2 register.
- * But not currently used.
- */
-static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc)
-{
-	return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
-}
-
-/*
- * Entered when a bau descriptor has gone into a permanent busy wait because
- * of a hardware bug.
- * Workaround the bug.
- */
-static int handle_uv2_busy(struct bau_control *bcp)
-{
-	struct ptc_stats *stat = bcp->statp;
-
-	stat->s_uv2_wars++;
-	bcp->busy = 1;
-	return FLUSH_GIVEUP;
-}
-
-static int uv2_3_wait_completion(struct bau_desc *bau_desc,
-				struct bau_control *bcp, long try)
-{
-	unsigned long descriptor_stat;
-	cycles_t ttm;
-	u64 mmr_offset = bcp->status_mmr;
-	int right_shift = bcp->status_index;
-	int desc = bcp->uvhub_cpu;
-	long busy_reps = 0;
-	struct ptc_stats *stat = bcp->statp;
-
-	descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
-
-	/* spin on the status MMR, waiting for it to go idle */
-	while (descriptor_stat != UV2H_DESC_IDLE) {
-		if (descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) {
-			/*
-			 * A h/w bug on the destination side may
-			 * have prevented the message being marked
-			 * pending, thus it doesn't get replied to
-			 * and gets continually nacked until it times
-			 * out with a SOURCE_TIMEOUT.
-			 */
-			stat->s_stimeout++;
-			return FLUSH_GIVEUP;
-		} else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) {
-			ttm = get_cycles();
-
-			/*
-			 * Our retries may be blocked by all destination
-			 * swack resources being consumed, and a timeout
-			 * pending.  In that case hardware returns the
-			 * ERROR that looks like a destination timeout.
-			 * Without using the extended status we have to
-			 * deduce from the short time that this was a
-			 * strong nack.
-			 */
-			if (cycles_2_us(ttm - bcp->send_message) < timeout_us) {
-				bcp->conseccompletes = 0;
-				stat->s_plugged++;
-				/* FLUSH_RETRY_PLUGGED causes hang on boot */
-				return FLUSH_GIVEUP;
-			}
-			stat->s_dtimeout++;
-			bcp->conseccompletes = 0;
-			/* FLUSH_RETRY_TIMEOUT causes hang on boot */
-			return FLUSH_GIVEUP;
-		} else {
-			busy_reps++;
-			if (busy_reps > 1000000) {
-				/* not to hammer on the clock */
-				busy_reps = 0;
-				ttm = get_cycles();
-				if ((ttm - bcp->send_message) > bcp->timeout_interval)
-					return handle_uv2_busy(bcp);
-			}
-			/*
-			 * descriptor_stat is still BUSY
-			 */
-			cpu_relax();
-		}
-		descriptor_stat = uv2_3_read_status(mmr_offset, right_shift, desc);
-	}
-	bcp->conseccompletes++;
-	return FLUSH_COMPLETE;
-}
-
-/*
- * Returns the status of current BAU message for cpu desc as a bit field
- * [Error][Busy][Aux]
- */
-static u64 read_status(u64 status_mmr, int index, int desc)
-{
-	u64 stat;
-
-	stat = ((read_lmmr(status_mmr) >> index) & UV_ACT_STATUS_MASK) << 1;
-	stat |= (read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_2) >> desc) & 0x1;
-
-	return stat;
-}
-
-static int uv4_wait_completion(struct bau_desc *bau_desc,
-				struct bau_control *bcp, long try)
-{
-	struct ptc_stats *stat = bcp->statp;
-	u64 descriptor_stat;
-	u64 mmr = bcp->status_mmr;
-	int index = bcp->status_index;
-	int desc = bcp->uvhub_cpu;
-
-	descriptor_stat = read_status(mmr, index, desc);
-
-	/* spin on the status MMR, waiting for it to go idle */
-	while (descriptor_stat != UV2H_DESC_IDLE) {
-		switch (descriptor_stat) {
-		case UV2H_DESC_SOURCE_TIMEOUT:
-			stat->s_stimeout++;
-			return FLUSH_GIVEUP;
-
-		case UV2H_DESC_DEST_TIMEOUT:
-			stat->s_dtimeout++;
-			bcp->conseccompletes = 0;
-			return FLUSH_RETRY_TIMEOUT;
-
-		case UV2H_DESC_DEST_STRONG_NACK:
-			stat->s_plugged++;
-			bcp->conseccompletes = 0;
-			return FLUSH_RETRY_PLUGGED;
-
-		case UV2H_DESC_DEST_PUT_ERR:
-			bcp->conseccompletes = 0;
-			return FLUSH_GIVEUP;
-
-		default:
-			/* descriptor_stat is still BUSY */
-			cpu_relax();
-		}
-		descriptor_stat = read_status(mmr, index, desc);
-	}
-	bcp->conseccompletes++;
-	return FLUSH_COMPLETE;
-}
-
-/*
- * Our retries are blocked by all destination sw ack resources being
- * in use, and a timeout is pending. In that case hardware immediately
- * returns the ERROR that looks like a destination timeout.
- */
-static void destination_plugged(struct bau_desc *bau_desc,
-			struct bau_control *bcp,
-			struct bau_control *hmaster, struct ptc_stats *stat)
-{
-	udelay(bcp->plugged_delay);
-	bcp->plugged_tries++;
-
-	if (bcp->plugged_tries >= bcp->plugsb4reset) {
-		bcp->plugged_tries = 0;
-
-		quiesce_local_uvhub(hmaster);
-
-		spin_lock(&hmaster->queue_lock);
-		reset_with_ipi(&bau_desc->distribution, bcp);
-		spin_unlock(&hmaster->queue_lock);
-
-		end_uvhub_quiesce(hmaster);
-
-		bcp->ipi_attempts++;
-		stat->s_resets_plug++;
-	}
-}
-
-static void destination_timeout(struct bau_desc *bau_desc,
-			struct bau_control *bcp, struct bau_control *hmaster,
-			struct ptc_stats *stat)
-{
-	hmaster->max_concurr = 1;
-	bcp->timeout_tries++;
-	if (bcp->timeout_tries >= bcp->timeoutsb4reset) {
-		bcp->timeout_tries = 0;
-
-		quiesce_local_uvhub(hmaster);
-
-		spin_lock(&hmaster->queue_lock);
-		reset_with_ipi(&bau_desc->distribution, bcp);
-		spin_unlock(&hmaster->queue_lock);
-
-		end_uvhub_quiesce(hmaster);
-
-		bcp->ipi_attempts++;
-		stat->s_resets_timeout++;
-	}
-}
-
-/*
- * Stop all cpus on a uvhub from using the BAU for a period of time.
- * This is reversed by check_enable.
- */
-static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat)
-{
-	int tcpu;
-	struct bau_control *tbcp;
-	struct bau_control *hmaster;
-	cycles_t tm1;
-
-	hmaster = bcp->uvhub_master;
-	spin_lock(&hmaster->disable_lock);
-	if (!bcp->baudisabled) {
-		stat->s_bau_disabled++;
-		tm1 = get_cycles();
-		for_each_present_cpu(tcpu) {
-			tbcp = &per_cpu(bau_control, tcpu);
-			if (tbcp->uvhub_master == hmaster) {
-				tbcp->baudisabled = 1;
-				tbcp->set_bau_on_time =
-					tm1 + bcp->disabled_period;
-			}
-		}
-	}
-	spin_unlock(&hmaster->disable_lock);
-}
-
-static void count_max_concurr(int stat, struct bau_control *bcp,
-				struct bau_control *hmaster)
-{
-	bcp->plugged_tries = 0;
-	bcp->timeout_tries = 0;
-	if (stat != FLUSH_COMPLETE)
-		return;
-	if (bcp->conseccompletes <= bcp->complete_threshold)
-		return;
-	if (hmaster->max_concurr >= hmaster->max_concurr_const)
-		return;
-	hmaster->max_concurr++;
-}
-
-static void record_send_stats(cycles_t time1, cycles_t time2,
-		struct bau_control *bcp, struct ptc_stats *stat,
-		int completion_status, int try)
-{
-	cycles_t elapsed;
-
-	if (time2 > time1) {
-		elapsed = time2 - time1;
-		stat->s_time += elapsed;
-
-		if ((completion_status == FLUSH_COMPLETE) && (try == 1)) {
-			bcp->period_requests++;
-			bcp->period_time += elapsed;
-			if ((elapsed > usec_2_cycles(bcp->cong_response_us)) &&
-			    (bcp->period_requests > bcp->cong_reps) &&
-			    ((bcp->period_time / bcp->period_requests) >
-					usec_2_cycles(bcp->cong_response_us))) {
-				stat->s_congested++;
-				disable_for_period(bcp, stat);
-			}
-		}
-	} else
-		stat->s_requestor--;
-
-	if (completion_status == FLUSH_COMPLETE && try > 1)
-		stat->s_retriesok++;
-	else if (completion_status == FLUSH_GIVEUP) {
-		stat->s_giveup++;
-		if (get_cycles() > bcp->period_end)
-			bcp->period_giveups = 0;
-		bcp->period_giveups++;
-		if (bcp->period_giveups == 1)
-			bcp->period_end = get_cycles() + bcp->disabled_period;
-		if (bcp->period_giveups > bcp->giveup_limit) {
-			disable_for_period(bcp, stat);
-			stat->s_giveuplimit++;
-		}
-	}
-}
-
-/*
- * Handle the completion status of a message send.
- */
-static void handle_cmplt(int completion_status, struct bau_desc *bau_desc,
-			struct bau_control *bcp, struct bau_control *hmaster,
-			struct ptc_stats *stat)
-{
-	if (completion_status == FLUSH_RETRY_PLUGGED)
-		destination_plugged(bau_desc, bcp, hmaster, stat);
-	else if (completion_status == FLUSH_RETRY_TIMEOUT)
-		destination_timeout(bau_desc, bcp, hmaster, stat);
-}
-
-/*
- * Send a broadcast and wait for it to complete.
- *
- * The flush_mask contains the cpus the broadcast is to be sent to including
- * cpus that are on the local uvhub.
- *
- * Returns 0 if all flushing represented in the mask was done.
- * Returns 1 if it gives up entirely and the original cpu mask is to be
- * returned to the kernel.
- */
-static int uv_flush_send_and_wait(struct cpumask *flush_mask,
-				  struct bau_control *bcp,
-				  struct bau_desc *bau_desc)
-{
-	int seq_number = 0;
-	int completion_stat = 0;
-	long try = 0;
-	unsigned long index;
-	cycles_t time1;
-	cycles_t time2;
-	struct ptc_stats *stat = bcp->statp;
-	struct bau_control *hmaster = bcp->uvhub_master;
-	struct uv2_3_bau_msg_header *uv2_3_hdr = NULL;
-
-	while (hmaster->uvhub_quiesce)
-		cpu_relax();
-
-	time1 = get_cycles();
-	uv2_3_hdr = &bau_desc->header.uv2_3_hdr;
-
-	do {
-		if (try == 0) {
-			uv2_3_hdr->msg_type = MSG_REGULAR;
-			seq_number = bcp->message_number++;
-		} else {
-			uv2_3_hdr->msg_type = MSG_RETRY;
-			stat->s_retry_messages++;
-		}
-
-		uv2_3_hdr->sequence = seq_number;
-		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
-		bcp->send_message = get_cycles();
-
-		write_mmr_activation(index);
-
-		try++;
-		completion_stat = ops.wait_completion(bau_desc, bcp, try);
-
-		handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
-
-		if (bcp->ipi_attempts >= bcp->ipi_reset_limit) {
-			bcp->ipi_attempts = 0;
-			stat->s_overipilimit++;
-			completion_stat = FLUSH_GIVEUP;
-			break;
-		}
-		cpu_relax();
-	} while ((completion_stat == FLUSH_RETRY_PLUGGED) ||
-		 (completion_stat == FLUSH_RETRY_TIMEOUT));
-
-	time2 = get_cycles();
-
-	count_max_concurr(completion_stat, bcp, hmaster);
-
-	while (hmaster->uvhub_quiesce)
-		cpu_relax();
-
-	atomic_dec(&hmaster->active_descriptor_count);
-
-	record_send_stats(time1, time2, bcp, stat, completion_stat, try);
-
-	if (completion_stat == FLUSH_GIVEUP)
-		/* FLUSH_GIVEUP will fall back to using IPI's for tlb flush */
-		return 1;
-	return 0;
-}
-
-/*
- * The BAU is disabled for this uvhub. When the disabled time period has
- * expired re-enable it.
- * Return 0 if it is re-enabled for all cpus on this uvhub.
- */
-static int check_enable(struct bau_control *bcp, struct ptc_stats *stat)
-{
-	int tcpu;
-	struct bau_control *tbcp;
-	struct bau_control *hmaster;
-
-	hmaster = bcp->uvhub_master;
-	spin_lock(&hmaster->disable_lock);
-	if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) {
-		stat->s_bau_reenabled++;
-		for_each_present_cpu(tcpu) {
-			tbcp = &per_cpu(bau_control, tcpu);
-			if (tbcp->uvhub_master == hmaster) {
-				tbcp->baudisabled = 0;
-				tbcp->period_requests = 0;
-				tbcp->period_time = 0;
-				tbcp->period_giveups = 0;
-			}
-		}
-		spin_unlock(&hmaster->disable_lock);
-		return 0;
-	}
-	spin_unlock(&hmaster->disable_lock);
-	return -1;
-}
-
-static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs,
-				int remotes, struct bau_desc *bau_desc)
-{
-	stat->s_requestor++;
-	stat->s_ntargcpu += remotes + locals;
-	stat->s_ntargremotes += remotes;
-	stat->s_ntarglocals += locals;
-
-	/* uvhub statistics */
-	hubs = bau_uvhub_weight(&bau_desc->distribution);
-	if (locals) {
-		stat->s_ntarglocaluvhub++;
-		stat->s_ntargremoteuvhub += (hubs - 1);
-	} else
-		stat->s_ntargremoteuvhub += hubs;
-
-	stat->s_ntarguvhub += hubs;
-
-	if (hubs >= 16)
-		stat->s_ntarguvhub16++;
-	else if (hubs >= 8)
-		stat->s_ntarguvhub8++;
-	else if (hubs >= 4)
-		stat->s_ntarguvhub4++;
-	else if (hubs >= 2)
-		stat->s_ntarguvhub2++;
-	else
-		stat->s_ntarguvhub1++;
-}
-
-/*
- * Translate a cpu mask to the uvhub distribution mask in the BAU
- * activation descriptor.
- */
-static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
-			struct bau_desc *bau_desc, int *localsp, int *remotesp)
-{
-	int cpu;
-	int pnode;
-	int cnt = 0;
-	struct hub_and_pnode *hpp;
-
-	for_each_cpu(cpu, flush_mask) {
-		/*
-		 * The distribution vector is a bit map of pnodes, relative
-		 * to the partition base pnode (and the partition base nasid
-		 * in the header).
-		 * Translate cpu to pnode and hub using a local memory array.
-		 */
-		hpp = &bcp->socket_master->thp[cpu];
-		pnode = hpp->pnode - bcp->partition_base_pnode;
-		bau_uvhub_set(pnode, &bau_desc->distribution);
-		cnt++;
-		if (hpp->uvhub == bcp->uvhub)
-			(*localsp)++;
-		else
-			(*remotesp)++;
-	}
-	if (!cnt)
-		return 1;
-	return 0;
-}
-
-/*
- * globally purge translation cache of a virtual address or all TLB's
- * @cpumask: mask of all cpu's in which the address is to be removed
- * @mm: mm_struct containing virtual address range
- * @start: start virtual address to be removed from TLB
- * @end: end virtual address to be remove from TLB
- * @cpu: the current cpu
- *
- * This is the entry point for initiating any UV global TLB shootdown.
- *
- * Purges the translation caches of all specified processors of the given
- * virtual address, or purges all TLB's on specified processors.
- *
- * The caller has derived the cpumask from the mm_struct.  This function
- * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
- *
- * The cpumask is converted into a uvhubmask of the uvhubs containing
- * those cpus.
- *
- * Note that this function should be called with preemption disabled.
- *
- * Returns NULL if all remote flushing was done.
- * Returns pointer to cpumask if some remote flushing remains to be
- * done.  The returned pointer is valid till preemption is re-enabled.
- */
-const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
-					  const struct flush_tlb_info *info)
-{
-	unsigned int cpu = smp_processor_id();
-	int locals = 0, remotes = 0, hubs = 0;
-	struct bau_desc *bau_desc;
-	struct cpumask *flush_mask;
-	struct ptc_stats *stat;
-	struct bau_control *bcp;
-	unsigned long descriptor_status, status, address;
-
-	bcp = &per_cpu(bau_control, cpu);
-
-	if (bcp->nobau)
-		return cpumask;
-
-	stat = bcp->statp;
-	stat->s_enters++;
-
-	if (bcp->busy) {
-		descriptor_status =
-			read_lmmr(UVH_LB_BAU_SB_ACTIVATION_STATUS_0);
-		status = ((descriptor_status >> (bcp->uvhub_cpu *
-			UV_ACT_STATUS_SIZE)) & UV_ACT_STATUS_MASK) << 1;
-		if (status == UV2H_DESC_BUSY)
-			return cpumask;
-		bcp->busy = 0;
-	}
-
-	/* bau was disabled due to slow response */
-	if (bcp->baudisabled) {
-		if (check_enable(bcp, stat)) {
-			stat->s_ipifordisabled++;
-			return cpumask;
-		}
-	}
-
-	/*
-	 * Each sending cpu has a per-cpu mask which it fills from the caller's
-	 * cpu mask.  All cpus are converted to uvhubs and copied to the
-	 * activation descriptor.
-	 */
-	flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu);
-	/* don't actually do a shootdown of the local cpu */
-	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
-
-	if (cpumask_test_cpu(cpu, cpumask))
-		stat->s_ntargself++;
-
-	bau_desc = bcp->descriptor_base;
-	bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
-	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
-	if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
-		return NULL;
-
-	record_send_statistics(stat, locals, hubs, remotes, bau_desc);
-
-	if (!info->end || (info->end - info->start) <= PAGE_SIZE)
-		address = info->start;
-	else
-		address = TLB_FLUSH_ALL;
-
-	switch (bcp->uvhub_version) {
-	case UV_BAU_V2:
-	case UV_BAU_V3:
-		bau_desc->payload.uv2_3.address = address;
-		bau_desc->payload.uv2_3.sending_cpu = cpu;
-		break;
-	case UV_BAU_V4:
-		bau_desc->payload.uv4.address = address;
-		bau_desc->payload.uv4.sending_cpu = cpu;
-		bau_desc->payload.uv4.qualifier = BAU_DESC_QUALIFIER;
-		break;
-	}
-
-	/*
-	 * uv_flush_send_and_wait returns 0 if all cpu's were messaged,
-	 * or 1 if it gave up and the original cpumask should be returned.
-	 */
-	if (!uv_flush_send_and_wait(flush_mask, bcp, bau_desc))
-		return NULL;
-	else
-		return cpumask;
-}
-
-/*
- * Search the message queue for any 'other' unprocessed message with the
- * same software acknowledge resource bit vector as the 'msg' message.
- */
-static struct bau_pq_entry *find_another_by_swack(struct bau_pq_entry *msg,
-						  struct bau_control *bcp)
-{
-	struct bau_pq_entry *msg_next = msg + 1;
-	unsigned char swack_vec = msg->swack_vec;
-
-	if (msg_next > bcp->queue_last)
-		msg_next = bcp->queue_first;
-	while (msg_next != msg) {
-		if ((msg_next->canceled == 0) && (msg_next->replied_to == 0) &&
-				(msg_next->swack_vec == swack_vec))
-			return msg_next;
-		msg_next++;
-		if (msg_next > bcp->queue_last)
-			msg_next = bcp->queue_first;
-	}
-	return NULL;
-}
-
-/*
- * UV2 needs to work around a bug in which an arriving message has not
- * set a bit in the UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE register.
- * Such a message must be ignored.
- */
-static void process_uv2_message(struct msg_desc *mdp, struct bau_control *bcp)
-{
-	unsigned long mmr_image;
-	unsigned char swack_vec;
-	struct bau_pq_entry *msg = mdp->msg;
-	struct bau_pq_entry *other_msg;
-
-	mmr_image = ops.read_l_sw_ack();
-	swack_vec = msg->swack_vec;
-
-	if ((swack_vec & mmr_image) == 0) {
-		/*
-		 * This message was assigned a swack resource, but no
-		 * reserved acknowlegment is pending.
-		 * The bug has prevented this message from setting the MMR.
-		 */
-		/*
-		 * Some message has set the MMR 'pending' bit; it might have
-		 * been another message.  Look for that message.
-		 */
-		other_msg = find_another_by_swack(msg, bcp);
-		if (other_msg) {
-			/*
-			 * There is another. Process this one but do not
-			 * ack it.
-			 */
-			bau_process_message(mdp, bcp, 0);
-			/*
-			 * Let the natural processing of that other message
-			 * acknowledge it. Don't get the processing of sw_ack's
-			 * out of order.
-			 */
-			return;
-		}
-	}
-
-	/*
-	 * Either the MMR shows this one pending a reply or there is no
-	 * other message using this sw_ack, so it is safe to acknowledge it.
-	 */
-	bau_process_message(mdp, bcp, 1);
-
-	return;
-}
-
-/*
- * The BAU message interrupt comes here. (registered by set_intr_gate)
- * See entry_64.S
- *
- * We received a broadcast assist message.
- *
- * Interrupts are disabled; this interrupt could represent
- * the receipt of several messages.
- *
- * All cores/threads on this hub get this interrupt.
- * The last one to see it does the software ack.
- * (the resource will not be freed until noninterruptable cpus see this
- *  interrupt; hardware may timeout the s/w ack and reply ERROR)
- */
-DEFINE_IDTENTRY_SYSVEC(sysvec_uv_bau_message)
-{
-	int count = 0;
-	cycles_t time_start;
-	struct bau_pq_entry *msg;
-	struct bau_control *bcp;
-	struct ptc_stats *stat;
-	struct msg_desc msgdesc;
-
-	ack_APIC_irq();
-	kvm_set_cpu_l1tf_flush_l1d();
-	time_start = get_cycles();
-
-	bcp = &per_cpu(bau_control, smp_processor_id());
-	stat = bcp->statp;
-
-	msgdesc.queue_first = bcp->queue_first;
-	msgdesc.queue_last = bcp->queue_last;
-
-	msg = bcp->bau_msg_head;
-	while (msg->swack_vec) {
-		count++;
-
-		msgdesc.msg_slot = msg - msgdesc.queue_first;
-		msgdesc.msg = msg;
-		if (bcp->uvhub_version == UV_BAU_V2)
-			process_uv2_message(&msgdesc, bcp);
-		else
-			/* no error workaround for uv3 */
-			bau_process_message(&msgdesc, bcp, 1);
-
-		msg++;
-		if (msg > msgdesc.queue_last)
-			msg = msgdesc.queue_first;
-		bcp->bau_msg_head = msg;
-	}
-	stat->d_time += (get_cycles() - time_start);
-	if (!count)
-		stat->d_nomsg++;
-	else if (count > 1)
-		stat->d_multmsg++;
-}
-
-/*
- * Each target uvhub (i.e. a uvhub that has cpu's) needs to have
- * shootdown message timeouts enabled.  The timeout does not cause
- * an interrupt, but causes an error message to be returned to
- * the sender.
- */
-static void __init enable_timeouts(void)
-{
-	int uvhub;
-	int nuvhubs;
-	int pnode;
-	unsigned long mmr_image;
-
-	nuvhubs = uv_num_possible_blades();
-
-	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-		if (!uv_blade_nr_possible_cpus(uvhub))
-			continue;
-
-		pnode = uv_blade_to_pnode(uvhub);
-		mmr_image = read_mmr_misc_control(pnode);
-		/*
-		 * Set the timeout period and then lock it in, in three
-		 * steps; captures and locks in the period.
-		 *
-		 * To program the period, the SOFT_ACK_MODE must be off.
-		 */
-		mmr_image &= ~(1L << SOFTACK_MSHIFT);
-		write_mmr_misc_control(pnode, mmr_image);
-		/*
-		 * Set the 4-bit period.
-		 */
-		mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT);
-		mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT);
-		write_mmr_misc_control(pnode, mmr_image);
-
-		mmr_image |= (1L << SOFTACK_MSHIFT);
-		if (is_uv2_hub()) {
-			/* do not touch the legacy mode bit */
-			/* hw bug workaround; do not use extended status */
-			mmr_image &= ~(1L << UV2_EXT_SHFT);
-		} else if (is_uv3_hub()) {
-			mmr_image &= ~(1L << PREFETCH_HINT_SHFT);
-			mmr_image |= (1L << SB_STATUS_SHFT);
-		}
-		write_mmr_misc_control(pnode, mmr_image);
-	}
-}
-
-static void *ptc_seq_start(struct seq_file *file, loff_t *offset)
-{
-	if (*offset < num_possible_cpus())
-		return offset;
-	return NULL;
-}
-
-static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
-{
-	(*offset)++;
-	if (*offset < num_possible_cpus())
-		return offset;
-	return NULL;
-}
-
-static void ptc_seq_stop(struct seq_file *file, void *data)
-{
-}
-
-/*
- * Display the statistics thru /proc/sgi_uv/ptc_statistics
- * 'data' points to the cpu number
- * Note: see the descriptions in stat_description[].
- */
-static int ptc_seq_show(struct seq_file *file, void *data)
-{
-	struct ptc_stats *stat;
-	struct bau_control *bcp;
-	int cpu;
-
-	cpu = *(loff_t *)data;
-	if (!cpu) {
-		seq_puts(file,
-			 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
-		seq_puts(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
-		seq_puts(file,
-			 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
-		seq_puts(file,
-			 "rok resetp resett giveup sto bz throt disable ");
-		seq_puts(file,
-			 "enable wars warshw warwaits enters ipidis plugged ");
-		seq_puts(file,
-			 "ipiover glim cong swack recv rtime all one mult ");
-		seq_puts(file, "none retry canc nocan reset rcan\n");
-	}
-	if (cpu < num_possible_cpus() && cpu_online(cpu)) {
-		bcp = &per_cpu(bau_control, cpu);
-		if (bcp->nobau) {
-			seq_printf(file, "cpu %d bau disabled\n", cpu);
-			return 0;
-		}
-		stat = bcp->statp;
-		/* source side statistics */
-		seq_printf(file,
-			"cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-			   cpu, bcp->nobau, stat->s_requestor,
-			   cycles_2_us(stat->s_time),
-			   stat->s_ntargself, stat->s_ntarglocals,
-			   stat->s_ntargremotes, stat->s_ntargcpu,
-			   stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub,
-			   stat->s_ntarguvhub, stat->s_ntarguvhub16);
-		seq_printf(file, "%ld %ld %ld %ld %ld %ld ",
-			   stat->s_ntarguvhub8, stat->s_ntarguvhub4,
-			   stat->s_ntarguvhub2, stat->s_ntarguvhub1,
-			   stat->s_dtimeout, stat->s_strongnacks);
-		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ",
-			   stat->s_retry_messages, stat->s_retriesok,
-			   stat->s_resets_plug, stat->s_resets_timeout,
-			   stat->s_giveup, stat->s_stimeout,
-			   stat->s_busy, stat->s_throttles);
-		seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
-			   stat->s_bau_disabled, stat->s_bau_reenabled,
-			   stat->s_uv2_wars, stat->s_uv2_wars_hw,
-			   stat->s_uv2_war_waits, stat->s_enters,
-			   stat->s_ipifordisabled, stat->s_plugged,
-			   stat->s_overipilimit, stat->s_giveuplimit,
-			   stat->s_congested);
-
-		/* destination side statistics */
-		seq_printf(file,
-			"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
-			   ops.read_g_sw_ack(uv_cpu_to_pnode(cpu)),
-			   stat->d_requestee, cycles_2_us(stat->d_time),
-			   stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
-			   stat->d_nomsg, stat->d_retries, stat->d_canceled,
-			   stat->d_nocanceled, stat->d_resets,
-			   stat->d_rcanceled);
-	}
-	return 0;
-}
-
-/*
- * Display the tunables thru debugfs
- */
-static ssize_t tunables_read(struct file *file, char __user *userbuf,
-				size_t count, loff_t *ppos)
-{
-	char *buf;
-	int ret;
-
-	buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d %d\n",
-		"max_concur plugged_delay plugsb4reset timeoutsb4reset",
-		"ipi_reset_limit complete_threshold congested_response_us",
-		"congested_reps disabled_period giveup_limit",
-		max_concurr, plugged_delay, plugsb4reset,
-		timeoutsb4reset, ipi_reset_limit, complete_threshold,
-		congested_respns_us, congested_reps, disabled_period,
-		giveup_limit);
-
-	if (!buf)
-		return -ENOMEM;
-
-	ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
-	kfree(buf);
-	return ret;
-}
-
-/*
- * handle a write to /proc/sgi_uv/ptc_statistics
- * -1: reset the statistics
- *  0: display meaning of the statistics
- */
-static ssize_t ptc_proc_write(struct file *file, const char __user *user,
-				size_t count, loff_t *data)
-{
-	int cpu;
-	int i;
-	int elements;
-	long input_arg;
-	char optstr[64];
-	struct ptc_stats *stat;
-
-	if (count == 0 || count > sizeof(optstr))
-		return -EINVAL;
-	if (copy_from_user(optstr, user, count))
-		return -EFAULT;
-	optstr[count - 1] = '\0';
-
-	if (!strcmp(optstr, "on")) {
-		set_bau_on();
-		return count;
-	} else if (!strcmp(optstr, "off")) {
-		set_bau_off();
-		return count;
-	}
-
-	if (kstrtol(optstr, 10, &input_arg) < 0) {
-		pr_debug("%s is invalid\n", optstr);
-		return -EINVAL;
-	}
-
-	if (input_arg == 0) {
-		elements = ARRAY_SIZE(stat_description);
-		pr_debug("# cpu:      cpu number\n");
-		pr_debug("Sender statistics:\n");
-		for (i = 0; i < elements; i++)
-			pr_debug("%s\n", stat_description[i]);
-	} else if (input_arg == -1) {
-		for_each_present_cpu(cpu) {
-			stat = &per_cpu(ptcstats, cpu);
-			memset(stat, 0, sizeof(struct ptc_stats));
-		}
-	}
-
-	return count;
-}
-
-static int local_atoi(const char *name)
-{
-	int val = 0;
-
-	for (;; name++) {
-		switch (*name) {
-		case '0' ... '9':
-			val = 10*val+(*name-'0');
-			break;
-		default:
-			return val;
-		}
-	}
-}
-
-/*
- * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables.
- * Zero values reset them to defaults.
- */
-static int parse_tunables_write(struct bau_control *bcp, char *instr,
-				int count)
-{
-	char *p;
-	char *q;
-	int cnt = 0;
-	int val;
-	int e = ARRAY_SIZE(tunables);
-
-	p = instr + strspn(instr, WHITESPACE);
-	q = p;
-	for (; *p; p = q + strspn(q, WHITESPACE)) {
-		q = p + strcspn(p, WHITESPACE);
-		cnt++;
-		if (q == p)
-			break;
-	}
-	if (cnt != e) {
-		pr_info("bau tunable error: should be %d values\n", e);
-		return -EINVAL;
-	}
-
-	p = instr + strspn(instr, WHITESPACE);
-	q = p;
-	for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) {
-		q = p + strcspn(p, WHITESPACE);
-		val = local_atoi(p);
-		switch (cnt) {
-		case 0:
-			if (val == 0) {
-				max_concurr = MAX_BAU_CONCURRENT;
-				max_concurr_const = MAX_BAU_CONCURRENT;
-				continue;
-			}
-			if (val < 1 || val > bcp->cpus_in_uvhub) {
-				pr_debug(
-				"Error: BAU max concurrent %d is invalid\n",
-				val);
-				return -EINVAL;
-			}
-			max_concurr = val;
-			max_concurr_const = val;
-			continue;
-		default:
-			if (val == 0)
-				*tunables[cnt].tunp = tunables[cnt].deflt;
-			else
-				*tunables[cnt].tunp = val;
-			continue;
-		}
-	}
-	return 0;
-}
-
-/*
- * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables)
- */
-static ssize_t tunables_write(struct file *file, const char __user *user,
-				size_t count, loff_t *data)
-{
-	int cpu;
-	int ret;
-	char instr[100];
-	struct bau_control *bcp;
-
-	if (count == 0 || count > sizeof(instr)-1)
-		return -EINVAL;
-	if (copy_from_user(instr, user, count))
-		return -EFAULT;
-
-	instr[count] = '\0';
-
-	cpu = get_cpu();
-	bcp = &per_cpu(bau_control, cpu);
-	ret = parse_tunables_write(bcp, instr, count);
-	put_cpu();
-	if (ret)
-		return ret;
-
-	for_each_present_cpu(cpu) {
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->max_concurr         = max_concurr;
-		bcp->max_concurr_const   = max_concurr;
-		bcp->plugged_delay       = plugged_delay;
-		bcp->plugsb4reset        = plugsb4reset;
-		bcp->timeoutsb4reset     = timeoutsb4reset;
-		bcp->ipi_reset_limit     = ipi_reset_limit;
-		bcp->complete_threshold  = complete_threshold;
-		bcp->cong_response_us    = congested_respns_us;
-		bcp->cong_reps           = congested_reps;
-		bcp->disabled_period     = sec_2_cycles(disabled_period);
-		bcp->giveup_limit        = giveup_limit;
-	}
-	return count;
-}
-
-static const struct seq_operations uv_ptc_seq_ops = {
-	.start		= ptc_seq_start,
-	.next		= ptc_seq_next,
-	.stop		= ptc_seq_stop,
-	.show		= ptc_seq_show
-};
-
-static int ptc_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &uv_ptc_seq_ops);
-}
-
-static int tunables_open(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-static const struct proc_ops uv_ptc_proc_ops = {
-	.proc_open	= ptc_proc_open,
-	.proc_read	= seq_read,
-	.proc_write	= ptc_proc_write,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= seq_release,
-};
-
-static const struct file_operations tunables_fops = {
-	.open		= tunables_open,
-	.read		= tunables_read,
-	.write		= tunables_write,
-	.llseek		= default_llseek,
-};
-
-static int __init uv_ptc_init(void)
-{
-	struct proc_dir_entry *proc_uv_ptc;
-
-	if (!is_uv_system())
-		return 0;
-
-	proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
-				  &uv_ptc_proc_ops);
-	if (!proc_uv_ptc) {
-		pr_err("unable to create %s proc entry\n",
-		       UV_PTC_BASENAME);
-		return -EINVAL;
-	}
-
-	tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
-	debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, tunables_dir, NULL,
-			    &tunables_fops);
-	return 0;
-}
-
-/*
- * Initialize the sending side's sending buffers.
- */
-static void activation_descriptor_init(int node, int pnode, int base_pnode)
-{
-	int i;
-	int cpu;
-	unsigned long gpa;
-	unsigned long m;
-	unsigned long n;
-	size_t dsize;
-	struct bau_desc *bau_desc;
-	struct bau_desc *bd2;
-	struct uv2_3_bau_msg_header *uv2_3_hdr;
-	struct bau_control *bcp;
-
-	/*
-	 * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC)
-	 * per cpu; and one per cpu on the uvhub (ADP_SZ)
-	 */
-	dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC;
-	bau_desc = kmalloc_node(dsize, GFP_KERNEL, node);
-	BUG_ON(!bau_desc);
-
-	gpa = uv_gpa(bau_desc);
-	n = uv_gpa_to_gnode(gpa);
-	m = ops.bau_gpa_to_offset(gpa);
-
-	/* the 14-bit pnode */
-	write_mmr_descriptor_base(pnode,
-		(n << UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT | m));
-	/*
-	 * Initializing all 8 (ITEMS_PER_DESC) descriptors for each
-	 * cpu even though we only use the first one; one descriptor can
-	 * describe a broadcast to 256 uv hubs.
-	 */
-	for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
-		memset(bd2, 0, sizeof(struct bau_desc));
-		/*
-		 * BIOS uses legacy mode, but uv2 and uv3 hardware always
-		 * uses native mode for selective broadcasts.
-		 */
-		uv2_3_hdr = &bd2->header.uv2_3_hdr;
-		uv2_3_hdr->swack_flag      = 1;
-		uv2_3_hdr->base_dest_nasid = UV_PNODE_TO_NASID(base_pnode);
-		uv2_3_hdr->dest_subnodeid  = UV_LB_SUBNODEID;
-		uv2_3_hdr->command         = UV_NET_ENDPOINT_INTD;
-	}
-	for_each_present_cpu(cpu) {
-		if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
-			continue;
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->descriptor_base = bau_desc;
-	}
-}
-
-/*
- * initialize the destination side's receiving buffers
- * entered for each uvhub in the partition
- * - node is first node (kernel memory notion) on the uvhub
- * - pnode is the uvhub's physical identifier
- */
-static void pq_init(int node, int pnode)
-{
-	int cpu;
-	size_t plsize;
-	char *cp;
-	void *vp;
-	unsigned long gnode, first, last, tail;
-	struct bau_pq_entry *pqp;
-	struct bau_control *bcp;
-
-	plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry);
-	vp = kmalloc_node(plsize, GFP_KERNEL, node);
-	BUG_ON(!vp);
-
-	pqp = (struct bau_pq_entry *)vp;
-	cp = (char *)pqp + 31;
-	pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5);
-
-	for_each_present_cpu(cpu) {
-		if (pnode != uv_cpu_to_pnode(cpu))
-			continue;
-		/* for every cpu on this pnode: */
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->queue_first	= pqp;
-		bcp->bau_msg_head	= pqp;
-		bcp->queue_last		= pqp + (DEST_Q_SIZE - 1);
-	}
-
-	first = ops.bau_gpa_to_offset(uv_gpa(pqp));
-	last = ops.bau_gpa_to_offset(uv_gpa(pqp + (DEST_Q_SIZE - 1)));
-
-	/*
-	 * Pre UV4, the gnode is required to locate the payload queue
-	 * and the payload queue tail must be maintained by the kernel.
-	 */
-	bcp = &per_cpu(bau_control, smp_processor_id());
-	if (bcp->uvhub_version <= UV_BAU_V3) {
-		tail = first;
-		gnode = uv_gpa_to_gnode(uv_gpa(pqp));
-		first = (gnode << UV_PAYLOADQ_GNODE_SHIFT) | tail;
-		write_mmr_payload_tail(pnode, tail);
-	}
-
-	ops.write_payload_first(pnode, first);
-	ops.write_payload_last(pnode, last);
-
-	/* in effect, all msg_type's are set to MSG_NOOP */
-	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
-}
-
-/*
- * Initialization of each UV hub's structures
- */
-static void __init init_uvhub(int uvhub, int vector, int base_pnode)
-{
-	int node;
-	int pnode;
-	unsigned long apicid;
-
-	node = uvhub_to_first_node(uvhub);
-	pnode = uv_blade_to_pnode(uvhub);
-
-	activation_descriptor_init(node, pnode, base_pnode);
-
-	pq_init(node, pnode);
-	/*
-	 * The below initialization can't be in firmware because the
-	 * messaging IRQ will be determined by the OS.
-	 */
-	apicid = uvhub_to_first_apicid(uvhub);
-	write_mmr_data_config(pnode, ((apicid << 32) | vector));
-}
-
-/*
- * We will set BAU_MISC_CONTROL with a timeout period.
- * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT.
- * So the destination timeout period has to be calculated from them.
- */
-static int calculate_destination_timeout(void)
-{
-	unsigned long mmr_image;
-	int mult1;
-	int base;
-	int ret;
-
-	/* same destination timeout for uv2 and uv3 */
-	/* 4 bits  0/1 for 10/80us base, 3 bits of multiplier */
-	mmr_image = uv_read_local_mmr(UVH_LB_BAU_MISC_CONTROL);
-	mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT;
-	if (mmr_image & (1L << UV2_ACK_UNITS_SHFT))
-		base = 80;
-	else
-		base = 10;
-	mult1 = mmr_image & UV2_ACK_MASK;
-	ret = mult1 * base;
-
-	return ret;
-}
-
-static void __init init_per_cpu_tunables(void)
-{
-	int cpu;
-	struct bau_control *bcp;
-
-	for_each_present_cpu(cpu) {
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->baudisabled		= 0;
-		if (nobau)
-			bcp->nobau		= true;
-		bcp->statp			= &per_cpu(ptcstats, cpu);
-		/* time interval to catch a hardware stay-busy bug */
-		bcp->timeout_interval		= usec_2_cycles(2*timeout_us);
-		bcp->max_concurr		= max_concurr;
-		bcp->max_concurr_const		= max_concurr;
-		bcp->plugged_delay		= plugged_delay;
-		bcp->plugsb4reset		= plugsb4reset;
-		bcp->timeoutsb4reset		= timeoutsb4reset;
-		bcp->ipi_reset_limit		= ipi_reset_limit;
-		bcp->complete_threshold		= complete_threshold;
-		bcp->cong_response_us		= congested_respns_us;
-		bcp->cong_reps			= congested_reps;
-		bcp->disabled_period		= sec_2_cycles(disabled_period);
-		bcp->giveup_limit		= giveup_limit;
-		spin_lock_init(&bcp->queue_lock);
-		spin_lock_init(&bcp->uvhub_lock);
-		spin_lock_init(&bcp->disable_lock);
-	}
-}
-
-/*
- * Scan all cpus to collect blade and socket summaries.
- */
-static int __init get_cpu_topology(int base_pnode,
-					struct uvhub_desc *uvhub_descs,
-					unsigned char *uvhub_mask)
-{
-	int cpu;
-	int pnode;
-	int uvhub;
-	int socket;
-	struct bau_control *bcp;
-	struct uvhub_desc *bdp;
-	struct socket_desc *sdp;
-
-	for_each_present_cpu(cpu) {
-		bcp = &per_cpu(bau_control, cpu);
-
-		memset(bcp, 0, sizeof(struct bau_control));
-
-		pnode = uv_cpu_hub_info(cpu)->pnode;
-		if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
-			pr_emerg(
-				"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
-				cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
-			return 1;
-		}
-
-		bcp->osnode = cpu_to_node(cpu);
-		bcp->partition_base_pnode = base_pnode;
-
-		uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
-		*(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
-		bdp = &uvhub_descs[uvhub];
-
-		bdp->num_cpus++;
-		bdp->uvhub = uvhub;
-		bdp->pnode = pnode;
-
-		/* kludge: 'assuming' one node per socket, and assuming that
-		   disabling a socket just leaves a gap in node numbers */
-		socket = bcp->osnode & 1;
-		bdp->socket_mask |= (1 << socket);
-		sdp = &bdp->socket[socket];
-		sdp->cpu_number[sdp->num_cpus] = cpu;
-		sdp->num_cpus++;
-		if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
-			pr_emerg("%d cpus per socket invalid\n",
-				sdp->num_cpus);
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * Each socket is to get a local array of pnodes/hubs.
- */
-static void make_per_cpu_thp(struct bau_control *smaster)
-{
-	int cpu;
-	size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus();
-
-	smaster->thp = kzalloc_node(hpsz, GFP_KERNEL, smaster->osnode);
-	for_each_present_cpu(cpu) {
-		smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode;
-		smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id;
-	}
-}
-
-/*
- * Each uvhub is to get a local cpumask.
- */
-static void make_per_hub_cpumask(struct bau_control *hmaster)
-{
-	int sz = sizeof(cpumask_t);
-
-	hmaster->cpumask = kzalloc_node(sz, GFP_KERNEL, hmaster->osnode);
-}
-
-/*
- * Initialize all the per_cpu information for the cpu's on a given socket,
- * given what has been gathered into the socket_desc struct.
- * And reports the chosen hub and socket masters back to the caller.
- */
-static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
-			struct bau_control **smasterp,
-			struct bau_control **hmasterp)
-{
-	int i, cpu, uvhub_cpu;
-	struct bau_control *bcp;
-
-	for (i = 0; i < sdp->num_cpus; i++) {
-		cpu = sdp->cpu_number[i];
-		bcp = &per_cpu(bau_control, cpu);
-		bcp->cpu = cpu;
-		if (i == 0) {
-			*smasterp = bcp;
-			if (!(*hmasterp))
-				*hmasterp = bcp;
-		}
-		bcp->cpus_in_uvhub = bdp->num_cpus;
-		bcp->cpus_in_socket = sdp->num_cpus;
-		bcp->socket_master = *smasterp;
-		bcp->uvhub = bdp->uvhub;
-		if (is_uv2_hub())
-			bcp->uvhub_version = UV_BAU_V2;
-		else if (is_uv3_hub())
-			bcp->uvhub_version = UV_BAU_V3;
-		else if (is_uv4_hub())
-			bcp->uvhub_version = UV_BAU_V4;
-		else {
-			pr_emerg("uvhub version not 1, 2, 3, or 4\n");
-			return 1;
-		}
-		bcp->uvhub_master = *hmasterp;
-		uvhub_cpu = uv_cpu_blade_processor_id(cpu);
-		bcp->uvhub_cpu = uvhub_cpu;
-
-		/*
-		 * The ERROR and BUSY status registers are located pairwise over
-		 * the STATUS_0 and STATUS_1 mmrs; each an array[32] of 2 bits.
-		 */
-		if (uvhub_cpu < UV_CPUS_PER_AS) {
-			bcp->status_mmr = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
-			bcp->status_index = uvhub_cpu * UV_ACT_STATUS_SIZE;
-		} else {
-			bcp->status_mmr = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
-			bcp->status_index = (uvhub_cpu - UV_CPUS_PER_AS)
-						* UV_ACT_STATUS_SIZE;
-		}
-
-		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
-			pr_emerg("%d cpus per uvhub invalid\n",
-				bcp->uvhub_cpu);
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * Summarize the blade and socket topology into the per_cpu structures.
- */
-static int __init summarize_uvhub_sockets(int nuvhubs,
-			struct uvhub_desc *uvhub_descs,
-			unsigned char *uvhub_mask)
-{
-	int socket;
-	int uvhub;
-	unsigned short socket_mask;
-
-	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-		struct uvhub_desc *bdp;
-		struct bau_control *smaster = NULL;
-		struct bau_control *hmaster = NULL;
-
-		if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
-			continue;
-
-		bdp = &uvhub_descs[uvhub];
-		socket_mask = bdp->socket_mask;
-		socket = 0;
-		while (socket_mask) {
-			struct socket_desc *sdp;
-			if ((socket_mask & 1)) {
-				sdp = &bdp->socket[socket];
-				if (scan_sock(sdp, bdp, &smaster, &hmaster))
-					return 1;
-				make_per_cpu_thp(smaster);
-			}
-			socket++;
-			socket_mask = (socket_mask >> 1);
-		}
-		make_per_hub_cpumask(hmaster);
-	}
-	return 0;
-}
-
-/*
- * initialize the bau_control structure for each cpu
- */
-static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
-{
-	struct uvhub_desc *uvhub_descs;
-	unsigned char *uvhub_mask = NULL;
-
-	if (is_uv3_hub() || is_uv2_hub())
-		timeout_us = calculate_destination_timeout();
-
-	uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
-	if (!uvhub_descs)
-		goto fail;
-
-	uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
-	if (!uvhub_mask)
-		goto fail;
-
-	if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
-		goto fail;
-
-	if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
-		goto fail;
-
-	kfree(uvhub_descs);
-	kfree(uvhub_mask);
-	init_per_cpu_tunables();
-	return 0;
-
-fail:
-	kfree(uvhub_descs);
-	kfree(uvhub_mask);
-	return 1;
-}
-
-static const struct bau_operations uv2_3_bau_ops __initconst = {
-	.bau_gpa_to_offset       = uv_gpa_to_offset,
-	.read_l_sw_ack           = read_mmr_sw_ack,
-	.read_g_sw_ack           = read_gmmr_sw_ack,
-	.write_l_sw_ack          = write_mmr_sw_ack,
-	.write_g_sw_ack          = write_gmmr_sw_ack,
-	.write_payload_first     = write_mmr_payload_first,
-	.write_payload_last      = write_mmr_payload_last,
-	.wait_completion	 = uv2_3_wait_completion,
-};
-
-static const struct bau_operations uv4_bau_ops __initconst = {
-	.bau_gpa_to_offset       = uv_gpa_to_soc_phys_ram,
-	.read_l_sw_ack           = read_mmr_proc_sw_ack,
-	.read_g_sw_ack           = read_gmmr_proc_sw_ack,
-	.write_l_sw_ack          = write_mmr_proc_sw_ack,
-	.write_g_sw_ack          = write_gmmr_proc_sw_ack,
-	.write_payload_first     = write_mmr_proc_payload_first,
-	.write_payload_last      = write_mmr_proc_payload_last,
-	.wait_completion         = uv4_wait_completion,
-};
-
-/*
- * Initialization of BAU-related structures
- */
-static int __init uv_bau_init(void)
-{
-	int uvhub;
-	int pnode;
-	int nuvhubs;
-	int cur_cpu;
-	int cpus;
-	int vector;
-	cpumask_var_t *mask;
-
-	if (!is_uv_system())
-		return 0;
-
-	if (is_uv4_hub())
-		ops = uv4_bau_ops;
-	else if (is_uv3_hub())
-		ops = uv2_3_bau_ops;
-	else if (is_uv2_hub())
-		ops = uv2_3_bau_ops;
-
-	nuvhubs = uv_num_possible_blades();
-	if (nuvhubs < 2) {
-		pr_crit("UV: BAU disabled - insufficient hub count\n");
-		goto err_bau_disable;
-	}
-
-	for_each_possible_cpu(cur_cpu) {
-		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
-		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
-	}
-
-	uv_base_pnode = 0x7fffffff;
-	for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
-		cpus = uv_blade_nr_possible_cpus(uvhub);
-		if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
-			uv_base_pnode = uv_blade_to_pnode(uvhub);
-	}
-
-	/* software timeouts are not supported on UV4 */
-	if (is_uv3_hub() || is_uv2_hub())
-		enable_timeouts();
-
-	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
-		pr_crit("UV: BAU disabled - per CPU init failed\n");
-		goto err_bau_disable;
-	}
-
-	vector = UV_BAU_MESSAGE;
-	for_each_possible_blade(uvhub) {
-		if (uv_blade_nr_possible_cpus(uvhub))
-			init_uvhub(uvhub, vector, uv_base_pnode);
-	}
-
-	for_each_possible_blade(uvhub) {
-		if (uv_blade_nr_possible_cpus(uvhub)) {
-			unsigned long val;
-			unsigned long mmr;
-			pnode = uv_blade_to_pnode(uvhub);
-			/* INIT the bau */
-			val = 1L << 63;
-			write_gmmr_activation(pnode, val);
-			mmr = 1; /* should be 1 to broadcast to both sockets */
-			write_mmr_data_broadcast(pnode, mmr);
-		}
-	}
-
-	return 0;
-
-err_bau_disable:
-
-	for_each_possible_cpu(cur_cpu)
-		free_cpumask_var(per_cpu(uv_flush_tlb_mask, cur_cpu));
-
-	set_bau_off();
-	nobau_perm = 1;
-
-	return -EINVAL;
-}
-core_initcall(uv_bau_init);
-fs_initcall(uv_ptc_init);

From c4d98077443adf61268ffb8b2c5d63c6176d845f Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:18 -0500
Subject: [PATCH 239/262] x86/platform/uv: Remove SCIR MMR references for UV
 systems

UV class systems no longer use System Controller for monitoring of CPU
activity provided by this driver. Other methods have been developed for
BIOS and the management controller (BMC). Remove that supporting code.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-3-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/uv_hub.h   | 43 ++--------------
 arch/x86/kernel/apic/x2apic_uv_x.c | 82 ------------------------------
 2 files changed, 3 insertions(+), 122 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 100d66806503a4..b21228db75bfe2 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -129,17 +129,6 @@
  */
 #define UV_MAX_NASID_VALUE	(UV_MAX_NUMALINK_BLADES * 2)
 
-/* System Controller Interface Reg info */
-struct uv_scir_s {
-	struct timer_list timer;
-	unsigned long	offset;
-	unsigned long	last;
-	unsigned long	idle_on;
-	unsigned long	idle_off;
-	unsigned char	state;
-	unsigned char	enabled;
-};
-
 /* GAM (globally addressed memory) range table */
 struct uv_gam_range_s {
 	u32	limit;		/* PA bits 56:26 (GAM_RANGE_SHFT) */
@@ -191,16 +180,13 @@ struct uv_hub_info_s {
 struct uv_cpu_info_s {
 	void			*p_uv_hub_info;
 	unsigned char		blade_cpu_id;
-	struct uv_scir_s	scir;
+	void			*reserved;
 };
 DECLARE_PER_CPU(struct uv_cpu_info_s, __uv_cpu_info);
 
 #define uv_cpu_info		this_cpu_ptr(&__uv_cpu_info)
 #define uv_cpu_info_per(cpu)	(&per_cpu(__uv_cpu_info, cpu))
 
-#define	uv_scir_info		(&uv_cpu_info->scir)
-#define	uv_cpu_scir_info(cpu)	(&uv_cpu_info_per(cpu)->scir)
-
 /* Node specific hub common info struct */
 extern void **__uv_hub_info_list;
 static inline struct uv_hub_info_s *uv_hub_info_list(int node)
@@ -297,9 +283,9 @@ union uvh_apicid {
 #define UV3_GLOBAL_MMR32_SIZE		(32UL * 1024 * 1024)
 
 #define UV4_LOCAL_MMR_BASE		0xfa000000UL
-#define UV4_GLOBAL_MMR32_BASE		0xfc000000UL
+#define UV4_GLOBAL_MMR32_BASE		0
 #define UV4_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
-#define UV4_GLOBAL_MMR32_SIZE		(16UL * 1024 * 1024)
+#define UV4_GLOBAL_MMR32_SIZE		0
 
 #define UV_LOCAL_MMR_BASE		(				\
 					is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
@@ -772,29 +758,6 @@ DECLARE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
 #define	UV_NMI_STATE_DUMP		2
 #define	UV_NMI_STATE_DUMP_DONE		3
 
-/* Update SCIR state */
-static inline void uv_set_scir_bits(unsigned char value)
-{
-	if (uv_scir_info->state != value) {
-		uv_scir_info->state = value;
-		uv_write_local_mmr8(uv_scir_info->offset, value);
-	}
-}
-
-static inline unsigned long uv_scir_offset(int apicid)
-{
-	return SCIR_LOCAL_MMR_BASE | (apicid & 0x3f);
-}
-
-static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
-{
-	if (uv_cpu_scir_info(cpu)->state != value) {
-		uv_write_global_mmr8(uv_cpu_to_pnode(cpu),
-				uv_cpu_scir_info(cpu)->offset, value);
-		uv_cpu_scir_info(cpu)->state = value;
-	}
-}
-
 /*
  * Get the minimum revision number of the hub chips within the partition.
  * (See UVx_HUB_REVISION_BASE above for specific values.)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0b6eea3f54e6d7..f51fabf5601034 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -909,85 +909,6 @@ static __init void uv_rtc_init(void)
 	}
 }
 
-/*
- * percpu heartbeat timer
- */
-static void uv_heartbeat(struct timer_list *timer)
-{
-	unsigned char bits = uv_scir_info->state;
-
-	/* Flip heartbeat bit: */
-	bits ^= SCIR_CPU_HEARTBEAT;
-
-	/* Is this CPU idle? */
-	if (idle_cpu(raw_smp_processor_id()))
-		bits &= ~SCIR_CPU_ACTIVITY;
-	else
-		bits |= SCIR_CPU_ACTIVITY;
-
-	/* Update system controller interface reg: */
-	uv_set_scir_bits(bits);
-
-	/* Enable next timer period: */
-	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
-}
-
-static int uv_heartbeat_enable(unsigned int cpu)
-{
-	while (!uv_cpu_scir_info(cpu)->enabled) {
-		struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
-
-		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
-		timer_setup(timer, uv_heartbeat, TIMER_PINNED);
-		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
-		add_timer_on(timer, cpu);
-		uv_cpu_scir_info(cpu)->enabled = 1;
-
-		/* Also ensure that boot CPU is enabled: */
-		cpu = 0;
-	}
-	return 0;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static int uv_heartbeat_disable(unsigned int cpu)
-{
-	if (uv_cpu_scir_info(cpu)->enabled) {
-		uv_cpu_scir_info(cpu)->enabled = 0;
-		del_timer(&uv_cpu_scir_info(cpu)->timer);
-	}
-	uv_set_cpu_scir_bits(cpu, 0xff);
-	return 0;
-}
-
-static __init void uv_scir_register_cpu_notifier(void)
-{
-	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/x2apic-uvx:online",
-				  uv_heartbeat_enable, uv_heartbeat_disable);
-}
-
-#else /* !CONFIG_HOTPLUG_CPU */
-
-static __init void uv_scir_register_cpu_notifier(void)
-{
-}
-
-static __init int uv_init_heartbeat(void)
-{
-	int cpu;
-
-	if (is_uv_system()) {
-		for_each_online_cpu(cpu)
-			uv_heartbeat_enable(cpu);
-	}
-
-	return 0;
-}
-
-late_initcall(uv_init_heartbeat);
-
-#endif /* !CONFIG_HOTPLUG_CPU */
-
 /* Direct Legacy VGA I/O traffic to designated IOH */
 static int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags)
 {
@@ -1517,8 +1438,6 @@ static void __init uv_system_init_hub(void)
 			uv_hub_info_list(numa_node_id)->pnode = pnode;
 		else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
 			uv_cpu_hub_info(cpu)->pnode = pnode;
-
-		uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid);
 	}
 
 	for_each_node(nodeid) {
@@ -1547,7 +1466,6 @@ static void __init uv_system_init_hub(void)
 
 	uv_nmi_setup();
 	uv_cpu_init();
-	uv_scir_register_cpu_notifier();
 	uv_setup_proc_files(0);
 
 	/* Register Legacy VGA I/O redirection handler: */

From 788b66e34e8ab82a93c63a83ba5a9d04f2f4ae26 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Tue, 6 Oct 2020 16:34:27 -0500
Subject: [PATCH 240/262] drivers/misc/sgi-xp: Adjust references in UV kernel
 modules

Remove the define is_uv() is_uv_system and just use the latter as is.
This removes a conflict with a new symbol in the generated uv_mmrs.h
file (is_uv()).

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-4-mike.travis@hpe.com
---
 drivers/misc/sgi-xp/xp.h            | 7 +------
 drivers/misc/sgi-xp/xp_main.c       | 4 ++--
 drivers/misc/sgi-xp/xp_uv.c         | 6 ++++--
 drivers/misc/sgi-xp/xpc_main.c      | 6 +++---
 drivers/misc/sgi-xp/xpc_partition.c | 2 +-
 drivers/misc/sgi-xp/xpnet.c         | 2 +-
 6 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 06469b12aced94..2b6aabc61c6a95 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -17,11 +17,6 @@
 
 #if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
 #include <asm/uv/uv.h>
-#define is_uv()		is_uv_system()
-#endif
-
-#ifndef is_uv
-#define is_uv()		0
 #endif
 
 #ifdef USE_DBUG_ON
@@ -79,7 +74,7 @@
 
 #define XPC_MSG_SIZE(_payload_size) \
 				ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
-				      is_uv() ? 64 : 128)
+				      is_uv_system() ? 64 : 128)
 
 
 /*
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 61b03fcefb134f..0eea2f518967db 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -233,7 +233,7 @@ xp_init(void)
 	for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
 		mutex_init(&xpc_registrations[ch_number].mutex);
 
-	if (is_uv())
+	if (is_uv_system())
 		ret = xp_init_uv();
 	else
 		ret = 0;
@@ -249,7 +249,7 @@ module_init(xp_init);
 static void __exit
 xp_exit(void)
 {
-	if (is_uv())
+	if (is_uv_system())
 		xp_exit_uv();
 }
 
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index f15a9f2ac1ddc8..5dcca03b26cb39 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -148,7 +148,9 @@ xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
 enum xp_retval
 xp_init_uv(void)
 {
-	BUG_ON(!is_uv());
+	WARN_ON(!is_uv_system());
+	if (!is_uv_system())
+		return xpUnsupported;
 
 	xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
 #ifdef CONFIG_X86
@@ -168,5 +170,5 @@ xp_init_uv(void)
 void
 xp_exit_uv(void)
 {
-	BUG_ON(!is_uv());
+	WARN_ON(!is_uv_system());
 }
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 8a495dc82f167c..e3261e63d1f653 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -1043,7 +1043,7 @@ xpc_do_exit(enum xp_retval reason)
 
 	xpc_teardown_partitions();
 
-	if (is_uv())
+	if (is_uv_system())
 		xpc_exit_uv();
 }
 
@@ -1226,7 +1226,7 @@ xpc_init(void)
 	dev_set_name(xpc_part, "part");
 	dev_set_name(xpc_chan, "chan");
 
-	if (is_uv()) {
+	if (is_uv_system()) {
 		ret = xpc_init_uv();
 
 	} else {
@@ -1312,7 +1312,7 @@ xpc_init(void)
 
 	xpc_teardown_partitions();
 out_1:
-	if (is_uv())
+	if (is_uv_system())
 		xpc_exit_uv();
 	return ret;
 }
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 099a53bdbb7d84..a47b3bd75c08f9 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -433,7 +433,7 @@ xpc_discovery(void)
 	 */
 	region_size = xp_region_size;
 
-	if (is_uv())
+	if (is_uv_system())
 		max_regions = 256;
 	else {
 		max_regions = 64;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 837d6c3fe69cd8..8ee39910b30942 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -515,7 +515,7 @@ xpnet_init(void)
 {
 	int result;
 
-	if (!is_uv())
+	if (!is_uv_system())
 		return -ENODEV;
 
 	dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);

From 647128f1536efacca7bedf189790d24b22f03cca Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:20 -0500
Subject: [PATCH 241/262] x86/platform/uv: Update UV MMRs for UV5

Update UV MMRs in uv_mmrs.h for UV5 based on Verilog output from the
UV Hub hardware design files.  This is the next UV architecture with
a new class (UVY) being defined for 52 bit physical address masks.
Uses a bitmask for UV arch identification so a single test can cover
multiple versions.  Includes other adjustments to match the uv_mmrs.h
file to keep from encountering compile errors.  New UV5 functionality
is added in the patches that follow.

[ Fix W=1 build warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-5-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/uv_hub.h   |   61 +-
 arch/x86/include/asm/uv/uv_mmrs.h  | 7645 +++++++++++++++-------------
 arch/x86/kernel/apic/x2apic_uv_x.c |  267 +-
 arch/x86/platform/uv/uv_time.c     |   10 +-
 drivers/misc/sgi-gru/grufile.c     |    2 +-
 5 files changed, 4278 insertions(+), 3707 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index b21228db75bfe2..76969be0966021 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -144,6 +144,8 @@ struct uv_gam_range_s {
  * available in the L3 cache on the cpu socket for the node.
  */
 struct uv_hub_info_s {
+	unsigned int		hub_type;
+	unsigned char		hub_revision;
 	unsigned long		global_mmr_base;
 	unsigned long		global_mmr_shift;
 	unsigned long		gpa_mask;
@@ -156,7 +158,6 @@ struct uv_hub_info_s {
 	unsigned char		m_val;
 	unsigned char		n_val;
 	unsigned char		gr_table_len;
-	unsigned char		hub_revision;
 	unsigned char		apic_pnode_shift;
 	unsigned char		gpa_shift;
 	unsigned char		m_shift;
@@ -205,6 +206,17 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
 	return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info;
 }
 
+static inline int uv_hub_type(void)
+{
+	return uv_hub_info->hub_type;
+}
+
+static inline __init void uv_hub_type_set(int uvmask)
+{
+	uv_hub_info->hub_type = uvmask;
+}
+
+
 /*
  * HUB revision ranges for each UV HUB architecture.
  * This is a software convention - NOT the hardware revision numbers in
@@ -215,38 +227,29 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu)
 #define UV4_HUB_REVISION_BASE		7
 #define UV4A_HUB_REVISION_BASE		8	/* UV4 (fixed) rev 2 */
 
-static inline int is_uv2_hub(void)
-{
-	return is_uv_hubbed(uv(2));
-}
-
-static inline int is_uv3_hub(void)
-{
-	return is_uv_hubbed(uv(3));
-}
+static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; }
+static inline int is_uv1_hub(void) { return 0; }
+static inline int is_uv2_hub(void) { return is_uv(UV2); }
+static inline int is_uv3_hub(void) { return is_uv(UV3); }
+static inline int is_uv4a_hub(void) { return is_uv(UV4A); }
+static inline int is_uv4_hub(void) { return is_uv(UV4); }
+static inline int is_uv5_hub(void) { return 0; }
 
-/* First test "is UV4A", then "is UV4" */
-static inline int is_uv4a_hub(void)
-{
-	if (is_uv_hubbed(uv(4)))
-		return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
-	return 0;
-}
+/*
+ * UV4A is a revision of UV4.  So on UV4A, both is_uv4_hub() and
+ * is_uv4a_hub() return true, While on UV4, only is_uv4_hub()
+ * returns true.  So to get true results, first test if is UV4A,
+ * then test if is UV4.
+ */
 
-static inline int is_uv4_hub(void)
-{
-	return is_uv_hubbed(uv(4));
-}
+/* UVX class: UV2,3,4 */
+static inline int is_uvx_hub(void) { return is_uv(UVX); }
 
-static inline int is_uvx_hub(void)
-{
-	return (is_uv_hubbed(-2) >= uv(2));
-}
+/* UVY class: UV5,..? */
+static inline int is_uvy_hub(void) { return 0; }
 
-static inline int is_uv_hub(void)
-{
-	return is_uvx_hub();
-}
+/* Any UV Hubbed System */
+static inline int is_uv_hub(void) { return is_uv(UV_ANY); }
 
 union uvh_apicid {
     unsigned long       v;
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 775bf143a072c8..06ea2d1aaa3e44 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * SGI UV MMR definitions
+ * HPE UV MMR definitions
  *
  * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
  */
@@ -18,42 +18,43 @@
  * grouped by architecture types.
  *
  * UVH  - definitions common to all UV hub types.
- * UVXH - definitions common to all UV eXtended hub types (currently 2, 3, 4).
- * UV2H - definitions specific to UV type 2 hub.
- * UV3H - definitions specific to UV type 3 hub.
+ * UVXH - definitions common to UVX class (2, 3, 4).
+ * UVYH - definitions common to UVY class (5).
+ * UV5H - definitions specific to UV type 5 hub.
+ * UV4AH - definitions specific to UV type 4A hub.
  * UV4H - definitions specific to UV type 4 hub.
- *
- * So in general, MMR addresses and structures are identical on all hubs types.
- * These MMRs are identified as:
- *	#define UVH_xxx		<address>
- *	union uvh_xxx {
- *		unsigned long       v;
- *		struct uvh_int_cmpd_s {
- *		} s;
- *	};
+ * UV3H - definitions specific to UV type 3 hub.
+ * UV2H - definitions specific to UV type 2 hub.
  *
  * If the MMR exists on all hub types but have different addresses,
- * use a conditional operator to define the value at runtime.
- *	#define UV2Hxxx	b
- *	#define UV3Hxxx	c
- *	#define UV4Hxxx	d
- *	#define UV4AHxxx e
- *	#define UVHxxx	(is_uv2_hub() ? UV2Hxxx :
- *			(is_uv3_hub() ? UV3Hxxx :
- *			(is_uv4a_hub() ? UV4AHxxx :
- *					UV4Hxxx))
+ * use a conditional operator to define the value at runtime.  Any
+ * that are not defined are blank.
+ *	(UV4A variations only generated if different from uv4)
+ *	#define UVHxxx (
+ *		is_uv(UV5) ? UV5Hxxx value :
+ *		is_uv(UV4A) ? UV4AHxxx value :
+ *		is_uv(UV4) ? UV4Hxxx value :
+ *		is_uv(UV3) ? UV3Hxxx value :
+ *		is_uv(UV2) ? UV2Hxxx value :
+ *		<ucv> or <undef value>)
+ *
+ * Class UVX has UVs (2|3|4|4A).
+ * Class UVY has UVs (5).
  *
  *	union uvh_xxx {
  *		unsigned long       v;
  *		struct uvh_xxx_s {	 # Common fields only
  *		} s;
- *		struct uv2h_xxx_s {	 # Full UV2 definition (*)
- *		} s2;
- *		struct uv3h_xxx_s {	 # Full UV3 definition (*)
- *		} s3;
- *		(NOTE: No struct uv4ah_xxx_s members exist)
+ *		struct uv5h_xxx_s {	 # Full UV5 definition (*)
+ *		} s5;
+ *		struct uv4ah_xxx_s {	 # Full UV4A definition (*)
+ *		} s4a;
  *		struct uv4h_xxx_s {	 # Full UV4 definition (*)
  *		} s4;
+ *		struct uv3h_xxx_s {	 # Full UV3 definition (*)
+ *		} s3;
+ *		struct uv2h_xxx_s {	 # Full UV2 definition (*)
+ *		} s2;
  *	};
  *		(* - if present and different than the common struct)
  *
@@ -62,496 +63,702 @@
  * if the contents is the same for all hubs, only the "s" structure is
  * generated.
  *
- * If the MMR exists on ONLY 1 type of hub, no generic definition is
- * generated:
- *	#define UVnH_xxx	<uvn address>
- *	union uvnh_xxx {
- *		unsigned long       v;
- *		struct uvh_int_cmpd_s {
- *		} sn;
- *	};
- *
- * (GEN Flags: mflags_opt= undefs=function UV234=UVXH)
+ * (GEN Flags: undefs=function)
  */
 
+ /* UV bit masks */
+#define	UV2	(1 << 0)
+#define	UV3	(1 << 1)
+#define	UV4	(1 << 2)
+#define	UV4A	(1 << 3)
+#define	UV5	(1 << 4)
+#define	UVX	(UV2|UV3|UV4)
+#define	UVY	(UV5)
+#define	UV_ANY	(~0)
+
+
+
+
 #define UV_MMR_ENABLE		(1UL << 63)
 
+#define UV1_HUB_PART_NUMBER	0x88a5
 #define UV2_HUB_PART_NUMBER	0x8eb8
 #define UV2_HUB_PART_NUMBER_X	0x1111
 #define UV3_HUB_PART_NUMBER	0x9578
 #define UV3_HUB_PART_NUMBER_X	0x4321
 #define UV4_HUB_PART_NUMBER	0x99a1
+#define UV5_HUB_PART_NUMBER	0xa171
 
 /* Error function to catch undefined references */
 extern unsigned long uv_undefined(char *str);
 
-/* ========================================================================= */
-/*                          UVH_BAU_DATA_BROADCAST                           */
-/* ========================================================================= */
-#define UVH_BAU_DATA_BROADCAST 0x61688UL
-
-#define UV2H_BAU_DATA_BROADCAST_32 0x440
-#define UV3H_BAU_DATA_BROADCAST_32 0x440
-#define UV4H_BAU_DATA_BROADCAST_32 0x360
-#define UVH_BAU_DATA_BROADCAST_32 (					\
-	is_uv2_hub() ? UV2H_BAU_DATA_BROADCAST_32 :			\
-	is_uv3_hub() ? UV3H_BAU_DATA_BROADCAST_32 :			\
-	/*is_uv4_hub*/ UV4H_BAU_DATA_BROADCAST_32)
-
-#define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT		0
-#define UVH_BAU_DATA_BROADCAST_ENABLE_MASK		0x0000000000000001UL
-
-
-union uvh_bau_data_broadcast_u {
-	unsigned long	v;
-	struct uvh_bau_data_broadcast_s {
-		unsigned long	enable:1;			/* RW */
-		unsigned long	rsvd_1_63:63;
-	} s;
-};
-
-/* ========================================================================= */
-/*                           UVH_BAU_DATA_CONFIG                             */
-/* ========================================================================= */
-#define UVH_BAU_DATA_CONFIG 0x61680UL
-
-#define UV2H_BAU_DATA_CONFIG_32 0x438
-#define UV3H_BAU_DATA_CONFIG_32 0x438
-#define UV4H_BAU_DATA_CONFIG_32 0x358
-#define UVH_BAU_DATA_CONFIG_32 (					\
-	is_uv2_hub() ? UV2H_BAU_DATA_CONFIG_32 :			\
-	is_uv3_hub() ? UV3H_BAU_DATA_CONFIG_32 :			\
-	/*is_uv4_hub*/ UV4H_BAU_DATA_CONFIG_32)
-
-#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT			0
-#define UVH_BAU_DATA_CONFIG_DM_SHFT			8
-#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT		11
-#define UVH_BAU_DATA_CONFIG_STATUS_SHFT			12
-#define UVH_BAU_DATA_CONFIG_P_SHFT			13
-#define UVH_BAU_DATA_CONFIG_T_SHFT			15
-#define UVH_BAU_DATA_CONFIG_M_SHFT			16
-#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT		32
-#define UVH_BAU_DATA_CONFIG_VECTOR_MASK			0x00000000000000ffUL
-#define UVH_BAU_DATA_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_BAU_DATA_CONFIG_STATUS_MASK			0x0000000000001000UL
-#define UVH_BAU_DATA_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_BAU_DATA_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_BAU_DATA_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
-
-
-union uvh_bau_data_config_u {
-	unsigned long	v;
-	struct uvh_bau_data_config_s {
-		unsigned long	vector_:8;			/* RW */
-		unsigned long	dm:3;				/* RW */
-		unsigned long	destmode:1;			/* RW */
-		unsigned long	status:1;			/* RO */
-		unsigned long	p:1;				/* RO */
-		unsigned long	rsvd_14:1;
-		unsigned long	t:1;				/* RO */
-		unsigned long	m:1;				/* RW */
-		unsigned long	rsvd_17_31:15;
-		unsigned long	apic_id:32;			/* RW */
-	} s;
-};
-
 /* ========================================================================= */
 /*                           UVH_EVENT_OCCURRED0                             */
 /* ========================================================================= */
 #define UVH_EVENT_OCCURRED0 0x70000UL
-#define UVH_EVENT_OCCURRED0_32 0x5e8
 
+/* UVH common defines*/
 #define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT		0
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT		11
 #define UVH_EVENT_OCCURRED0_LB_HCERR_MASK		0x0000000000000001UL
-#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK		0x0000000000000800UL
 
+/* UVXH common defines */
 #define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT		2
-#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT		3
-#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT		4
-#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT		5
-#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT		6
-#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT		7
-#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT		8
-#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT		9
-#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		12
-#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		13
-#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT		14
-#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT		15
-#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT		16
 #define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK		0x0000000000000004UL
+#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT		3
 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK		0x0000000000000008UL
+#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT		4
 #define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK		0x0000000000000010UL
+#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT		5
 #define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK		0x0000000000000020UL
+#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT		6
 #define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK		0x0000000000000040UL
+#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT		7
 #define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK		0x0000000000000080UL
+#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT		8
 #define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK		0x0000000000000100UL
+#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT		9
 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK		0x0000000000000200UL
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT		11
+#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK		0x0000000000000800UL
+#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		12
 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK		0x0000000000001000UL
+#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		13
 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK		0x0000000000002000UL
+#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT		14
 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK		0x0000000000004000UL
+#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT		15
 #define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK		0x0000000000008000UL
+#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT		16
 #define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK		0x0000000000010000UL
 
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
-#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT		53
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
-#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
-#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
-#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
-#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
-#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
-#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
-#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
-#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
-#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
-
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
-#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT		53
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
-#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
-#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
-#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
-#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
-#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
-#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
-#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
-#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
-#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
-
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_SHFT		1
+#define UVYH_EVENT_OCCURRED0_KT_HCERR_MASK		0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_SHFT		2
+#define UVYH_EVENT_OCCURRED0_RH0_HCERR_MASK		0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_SHFT		3
+#define UVYH_EVENT_OCCURRED0_RH1_HCERR_MASK		0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_SHFT		4
+#define UVYH_EVENT_OCCURRED0_LH0_HCERR_MASK		0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_SHFT		5
+#define UVYH_EVENT_OCCURRED0_LH1_HCERR_MASK		0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_SHFT		6
+#define UVYH_EVENT_OCCURRED0_LH2_HCERR_MASK		0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_SHFT		7
+#define UVYH_EVENT_OCCURRED0_LH3_HCERR_MASK		0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_SHFT		8
+#define UVYH_EVENT_OCCURRED0_XB_HCERR_MASK		0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_SHFT		9
+#define UVYH_EVENT_OCCURRED0_RDM_HCERR_MASK		0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_SHFT		10
+#define UVYH_EVENT_OCCURRED0_NI0_HCERR_MASK		0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_SHFT		11
+#define UVYH_EVENT_OCCURRED0_NI1_HCERR_MASK		0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_SHFT		12
+#define UVYH_EVENT_OCCURRED0_LB_AOERR0_MASK		0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_SHFT		13
+#define UVYH_EVENT_OCCURRED0_KT_AOERR0_MASK		0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_SHFT		14
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR0_MASK		0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_SHFT		15
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR0_MASK		0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_SHFT		16
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR0_MASK		0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_SHFT		17
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR0_MASK		0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_SHFT		18
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR0_MASK		0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_SHFT		19
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR0_MASK		0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_SHFT		20
+#define UVYH_EVENT_OCCURRED0_XB_AOERR0_MASK		0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_SHFT		21
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR0_MASK		0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_SHFT		22
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR0_MASK		0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_SHFT		23
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR0_MASK		0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_SHFT		24
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_SHFT		25
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_SHFT		26
+#define UVYH_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_SHFT		27
+#define UVYH_EVENT_OCCURRED0_KT_AOERR1_MASK		0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_SHFT		28
+#define UVYH_EVENT_OCCURRED0_RH0_AOERR1_MASK		0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_SHFT		29
+#define UVYH_EVENT_OCCURRED0_RH1_AOERR1_MASK		0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_SHFT		30
+#define UVYH_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_SHFT		31
+#define UVYH_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_SHFT		32
+#define UVYH_EVENT_OCCURRED0_LH2_AOERR1_MASK		0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_SHFT		33
+#define UVYH_EVENT_OCCURRED0_LH3_AOERR1_MASK		0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_SHFT		34
+#define UVYH_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_SHFT		35
+#define UVYH_EVENT_OCCURRED0_RDM_AOERR1_MASK		0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_SHFT		36
+#define UVYH_EVENT_OCCURRED0_RT0_AOERR1_MASK		0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_SHFT		37
+#define UVYH_EVENT_OCCURRED0_RT1_AOERR1_MASK		0x0000002000000000UL
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_SHFT		38
+#define UVYH_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000004000000000UL
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_SHFT		39
+#define UVYH_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000008000000000UL
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	40
+#define UVYH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000010000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		41
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000020000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		42
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000040000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		43
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000080000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		44
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000100000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		45
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000200000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		46
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000400000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		47
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000800000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		48
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0001000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		49
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0002000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		50
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0004000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		51
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0008000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		52
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0010000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		53
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0020000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		54
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0040000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		55
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0080000000000000UL
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		56
+#define UVYH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0100000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_SHFT		57
+#define UVYH_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0200000000000000UL
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_SHFT		58
+#define UVYH_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0400000000000000UL
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		59
+#define UVYH_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0800000000000000UL
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		60
+#define UVYH_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x1000000000000000UL
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	61
+#define UVYH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x2000000000000000UL
+
+/* UV4 unique defines */
 #define UV4H_EVENT_OCCURRED0_KT_HCERR_SHFT		1
+#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK		0x0000000000000002UL
 #define UV4H_EVENT_OCCURRED0_KT_AOERR0_SHFT		10
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT		17
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT		18
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT		19
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT		20
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		21
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		22
-#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT		23
-#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT		24
-#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT		25
-#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		26
-#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		27
-#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		28
-#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		29
-#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT		30
-#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT		31
-#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT		32
-#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT		33
-#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT		34
-#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		35
-#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		36
-#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	37
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		38
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		39
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		40
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		41
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		42
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		43
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		44
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		45
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		46
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		47
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		48
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		49
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		50
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		51
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		52
-#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		53
-#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		54
-#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		55
-#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		56
-#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		57
-#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	58
-#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT		59
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		60
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		61
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		62
-#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		63
-#define UV4H_EVENT_OCCURRED0_KT_HCERR_MASK		0x0000000000000002UL
 #define UV4H_EVENT_OCCURRED0_KT_AOERR0_MASK		0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_SHFT		17
 #define UV4H_EVENT_OCCURRED0_RTQ0_AOERR0_MASK		0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_SHFT		18
 #define UV4H_EVENT_OCCURRED0_RTQ1_AOERR0_MASK		0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_SHFT		19
 #define UV4H_EVENT_OCCURRED0_RTQ2_AOERR0_MASK		0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_SHFT		20
 #define UV4H_EVENT_OCCURRED0_RTQ3_AOERR0_MASK		0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		21
 #define UV4H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		22
 #define UV4H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED0_LB_AOERR1_SHFT		23
 #define UV4H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED0_KT_AOERR1_SHFT		24
 #define UV4H_EVENT_OCCURRED0_KT_AOERR1_MASK		0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED0_RH_AOERR1_SHFT		25
 #define UV4H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		26
 #define UV4H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		27
 #define UV4H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		28
 #define UV4H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		29
 #define UV4H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED0_XB_AOERR1_SHFT		30
 #define UV4H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_SHFT		31
 #define UV4H_EVENT_OCCURRED0_RTQ0_AOERR1_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_SHFT		32
 #define UV4H_EVENT_OCCURRED0_RTQ1_AOERR1_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_SHFT		33
 #define UV4H_EVENT_OCCURRED0_RTQ2_AOERR1_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_SHFT		34
 #define UV4H_EVENT_OCCURRED0_RTQ3_AOERR1_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		35
 #define UV4H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		36
 #define UV4H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	37
 #define UV4H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		38
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		39
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		40
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		41
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		42
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		43
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		44
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		45
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		46
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		47
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		48
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		49
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		50
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0004000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		51
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0008000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		52
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0010000000000000UL
+#define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		53
 #define UV4H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0020000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		54
 #define UV4H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0040000000000000UL
+#define UV4H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		55
 #define UV4H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0080000000000000UL
+#define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		56
 #define UV4H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0100000000000000UL
+#define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		57
 #define UV4H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0200000000000000UL
+#define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	58
 #define UV4H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0400000000000000UL
+#define UV4H_EVENT_OCCURRED0_IPI_INT_SHFT		59
 #define UV4H_EVENT_OCCURRED0_IPI_INT_MASK		0x0800000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		60
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x1000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		61
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x2000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		62
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x4000000000000000UL
+#define UV4H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		63
 #define UV4H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x8000000000000000UL
 
-#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT (				\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT :		\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED0_EXTIO_INT0_SHFT)
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
+#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
+#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
+#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
+#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
+#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
+#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
+#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
+#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
+#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
+#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
+#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
+#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
+#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
+#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
+#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
+#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
+#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
+#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
+#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
+#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
+#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
+#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT		53
+#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
+#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
+#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
 
-union uvh_event_occurred0_u {
-	unsigned long	v;
-	struct uvh_event_occurred0_s {
-		unsigned long	lb_hcerr:1;			/* RW, W1C */
-		unsigned long	rsvd_1_10:10;
-		unsigned long	rh_aoerr0:1;			/* RW, W1C */
-		unsigned long	rsvd_12_63:52;
-	} s;
-	struct uvxh_event_occurred0_s {
-		unsigned long	lb_hcerr:1;			/* RW */
-		unsigned long	rsvd_1:1;
-		unsigned long	rh_hcerr:1;			/* RW */
-		unsigned long	lh0_hcerr:1;			/* RW */
-		unsigned long	lh1_hcerr:1;			/* RW */
-		unsigned long	gr0_hcerr:1;			/* RW */
-		unsigned long	gr1_hcerr:1;			/* RW */
-		unsigned long	ni0_hcerr:1;			/* RW */
-		unsigned long	ni1_hcerr:1;			/* RW */
-		unsigned long	lb_aoerr0:1;			/* RW */
-		unsigned long	rsvd_10:1;
-		unsigned long	rh_aoerr0:1;			/* RW */
-		unsigned long	lh0_aoerr0:1;			/* RW */
-		unsigned long	lh1_aoerr0:1;			/* RW */
-		unsigned long	gr0_aoerr0:1;			/* RW */
-		unsigned long	gr1_aoerr0:1;			/* RW */
-		unsigned long	xb_aoerr0:1;			/* RW */
-		unsigned long	rsvd_17_63:47;
-	} sx;
-	struct uv4h_event_occurred0_s {
-		unsigned long	lb_hcerr:1;			/* RW */
-		unsigned long	kt_hcerr:1;			/* RW */
-		unsigned long	rh_hcerr:1;			/* RW */
-		unsigned long	lh0_hcerr:1;			/* RW */
-		unsigned long	lh1_hcerr:1;			/* RW */
-		unsigned long	gr0_hcerr:1;			/* RW */
-		unsigned long	gr1_hcerr:1;			/* RW */
-		unsigned long	ni0_hcerr:1;			/* RW */
-		unsigned long	ni1_hcerr:1;			/* RW */
-		unsigned long	lb_aoerr0:1;			/* RW */
-		unsigned long	kt_aoerr0:1;			/* RW */
-		unsigned long	rh_aoerr0:1;			/* RW */
-		unsigned long	lh0_aoerr0:1;			/* RW */
-		unsigned long	lh1_aoerr0:1;			/* RW */
-		unsigned long	gr0_aoerr0:1;			/* RW */
-		unsigned long	gr1_aoerr0:1;			/* RW */
-		unsigned long	xb_aoerr0:1;			/* RW */
-		unsigned long	rtq0_aoerr0:1;			/* RW */
-		unsigned long	rtq1_aoerr0:1;			/* RW */
-		unsigned long	rtq2_aoerr0:1;			/* RW */
-		unsigned long	rtq3_aoerr0:1;			/* RW */
-		unsigned long	ni0_aoerr0:1;			/* RW */
-		unsigned long	ni1_aoerr0:1;			/* RW */
-		unsigned long	lb_aoerr1:1;			/* RW */
-		unsigned long	kt_aoerr1:1;			/* RW */
-		unsigned long	rh_aoerr1:1;			/* RW */
-		unsigned long	lh0_aoerr1:1;			/* RW */
-		unsigned long	lh1_aoerr1:1;			/* RW */
-		unsigned long	gr0_aoerr1:1;			/* RW */
-		unsigned long	gr1_aoerr1:1;			/* RW */
-		unsigned long	xb_aoerr1:1;			/* RW */
-		unsigned long	rtq0_aoerr1:1;			/* RW */
-		unsigned long	rtq1_aoerr1:1;			/* RW */
-		unsigned long	rtq2_aoerr1:1;			/* RW */
-		unsigned long	rtq3_aoerr1:1;			/* RW */
-		unsigned long	ni0_aoerr1:1;			/* RW */
-		unsigned long	ni1_aoerr1:1;			/* RW */
-		unsigned long	system_shutdown_int:1;		/* RW */
-		unsigned long	lb_irq_int_0:1;			/* RW */
-		unsigned long	lb_irq_int_1:1;			/* RW */
-		unsigned long	lb_irq_int_2:1;			/* RW */
-		unsigned long	lb_irq_int_3:1;			/* RW */
-		unsigned long	lb_irq_int_4:1;			/* RW */
-		unsigned long	lb_irq_int_5:1;			/* RW */
-		unsigned long	lb_irq_int_6:1;			/* RW */
-		unsigned long	lb_irq_int_7:1;			/* RW */
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT		1
+#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK		0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT		10
+#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK		0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT		17
+#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK		0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT		18
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT		19
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT		20
+#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT		21
+#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT		22
+#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT		23
+#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT		24
+#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT		25
+#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT		26
+#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT		27
+#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT		28
+#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT		29
+#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT		30
+#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT	31
+#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK	0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT		32
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK		0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT		33
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK		0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT		34
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK		0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT		35
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK		0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT		36
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK		0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT		37
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK		0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT		38
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK		0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT		39
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK		0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT		40
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK		0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT		41
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK		0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT		42
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK		0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT		43
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK		0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT		44
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK		0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT		45
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK		0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT		46
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK		0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT		47
+#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK		0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT		48
+#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK		0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT		49
+#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK		0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT		50
+#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK		0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT		51
+#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK		0x0008000000000000UL
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT	52
+#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK	0x0010000000000000UL
+#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT		53
+#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK		0x0020000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT		54
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK		0x0040000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT		55
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK		0x0080000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT		56
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK		0x0100000000000000UL
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT		57
+#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK		0x0200000000000000UL
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT		58
+#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK		0x0400000000000000UL
+
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK (				\
+	is_uv(UV4) ? 0x1000000000000000UL :				\
+	is_uv(UV3) ? 0x0040000000000000UL :				\
+	is_uv(UV2) ? 0x0040000000000000UL :				\
+	0)
+#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT (				\
+	is_uv(UV4) ? 60 :						\
+	is_uv(UV3) ? 54 :						\
+	is_uv(UV2) ? 54 :						\
+	-1)
+
+union uvh_event_occurred0_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	rsvd_1:1;
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	rsvd_10:1;
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rsvd_17_63:47;
+	} sx;
+
+	/* UVYH common struct */
+	struct uvyh_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	kt_hcerr:1;			/* RW */
+		unsigned long	rh0_hcerr:1;			/* RW */
+		unsigned long	rh1_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	lh2_hcerr:1;			/* RW */
+		unsigned long	lh3_hcerr:1;			/* RW */
+		unsigned long	xb_hcerr:1;			/* RW */
+		unsigned long	rdm_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	kt_aoerr0:1;			/* RW */
+		unsigned long	rh0_aoerr0:1;			/* RW */
+		unsigned long	rh1_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	lh2_aoerr0:1;			/* RW */
+		unsigned long	lh3_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rdm_aoerr0:1;			/* RW */
+		unsigned long	rt0_aoerr0:1;			/* RW */
+		unsigned long	rt1_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	kt_aoerr1:1;			/* RW */
+		unsigned long	rh0_aoerr1:1;			/* RW */
+		unsigned long	rh1_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	lh2_aoerr1:1;			/* RW */
+		unsigned long	lh3_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rdm_aoerr1:1;			/* RW */
+		unsigned long	rt0_aoerr1:1;			/* RW */
+		unsigned long	rt1_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	rsvd_62_63:2;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	kt_hcerr:1;			/* RW */
+		unsigned long	rh0_hcerr:1;			/* RW */
+		unsigned long	rh1_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	lh2_hcerr:1;			/* RW */
+		unsigned long	lh3_hcerr:1;			/* RW */
+		unsigned long	xb_hcerr:1;			/* RW */
+		unsigned long	rdm_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	kt_aoerr0:1;			/* RW */
+		unsigned long	rh0_aoerr0:1;			/* RW */
+		unsigned long	rh1_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	lh2_aoerr0:1;			/* RW */
+		unsigned long	lh3_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rdm_aoerr0:1;			/* RW */
+		unsigned long	rt0_aoerr0:1;			/* RW */
+		unsigned long	rt1_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	kt_aoerr1:1;			/* RW */
+		unsigned long	rh0_aoerr1:1;			/* RW */
+		unsigned long	rh1_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	lh2_aoerr1:1;			/* RW */
+		unsigned long	lh3_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rdm_aoerr1:1;			/* RW */
+		unsigned long	rt0_aoerr1:1;			/* RW */
+		unsigned long	rt1_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	rsvd_62_63:2;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	kt_hcerr:1;			/* RW */
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	kt_aoerr0:1;			/* RW */
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rtq0_aoerr0:1;			/* RW */
+		unsigned long	rtq1_aoerr0:1;			/* RW */
+		unsigned long	rtq2_aoerr0:1;			/* RW */
+		unsigned long	rtq3_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	kt_aoerr1:1;			/* RW */
+		unsigned long	rh_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	gr0_aoerr1:1;			/* RW */
+		unsigned long	gr1_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rtq0_aoerr1:1;			/* RW */
+		unsigned long	rtq1_aoerr1:1;			/* RW */
+		unsigned long	rtq2_aoerr1:1;			/* RW */
+		unsigned long	rtq3_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
 		unsigned long	lb_irq_int_8:1;			/* RW */
 		unsigned long	lb_irq_int_9:1;			/* RW */
 		unsigned long	lb_irq_int_10:1;		/* RW */
@@ -571,538 +778,1650 @@ union uvh_event_occurred0_u {
 		unsigned long	extio_int2:1;			/* RW */
 		unsigned long	extio_int3:1;			/* RW */
 	} s4;
-};
-
-/* ========================================================================= */
-/*                        UVH_EVENT_OCCURRED0_ALIAS                          */
-/* ========================================================================= */
-#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
-#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0
-
-
-/* ========================================================================= */
-/*                         UVH_EXTIO_INT0_BROADCAST                          */
-/* ========================================================================= */
-#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
-
-#define UV2H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV3H_EXTIO_INT0_BROADCAST_32 0x3f0
-#define UV4H_EXTIO_INT0_BROADCAST_32 0x310
-#define UVH_EXTIO_INT0_BROADCAST_32 (					\
-	is_uv2_hub() ? UV2H_EXTIO_INT0_BROADCAST_32 :			\
-	is_uv3_hub() ? UV3H_EXTIO_INT0_BROADCAST_32 :			\
-	/*is_uv4_hub*/ UV4H_EXTIO_INT0_BROADCAST_32)
-
-#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT		0
-#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK		0x0000000000000001UL
 
+	/* UV3 unique struct */
+	struct uv3h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	qp_hcerr:1;			/* RW */
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	qp_aoerr0:1;			/* RW */
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rt_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	qp_aoerr1:1;			/* RW */
+		unsigned long	rh_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	gr0_aoerr1:1;			/* RW */
+		unsigned long	gr1_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rt_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	rsvd_59_63:5;
+	} s3;
 
-union uvh_extio_int0_broadcast_u {
-	unsigned long	v;
-	struct uvh_extio_int0_broadcast_s {
-		unsigned long	enable:1;			/* RW */
-		unsigned long	rsvd_1_63:63;
-	} s;
+	/* UV2 unique struct */
+	struct uv2h_event_occurred0_s {
+		unsigned long	lb_hcerr:1;			/* RW */
+		unsigned long	qp_hcerr:1;			/* RW */
+		unsigned long	rh_hcerr:1;			/* RW */
+		unsigned long	lh0_hcerr:1;			/* RW */
+		unsigned long	lh1_hcerr:1;			/* RW */
+		unsigned long	gr0_hcerr:1;			/* RW */
+		unsigned long	gr1_hcerr:1;			/* RW */
+		unsigned long	ni0_hcerr:1;			/* RW */
+		unsigned long	ni1_hcerr:1;			/* RW */
+		unsigned long	lb_aoerr0:1;			/* RW */
+		unsigned long	qp_aoerr0:1;			/* RW */
+		unsigned long	rh_aoerr0:1;			/* RW */
+		unsigned long	lh0_aoerr0:1;			/* RW */
+		unsigned long	lh1_aoerr0:1;			/* RW */
+		unsigned long	gr0_aoerr0:1;			/* RW */
+		unsigned long	gr1_aoerr0:1;			/* RW */
+		unsigned long	xb_aoerr0:1;			/* RW */
+		unsigned long	rt_aoerr0:1;			/* RW */
+		unsigned long	ni0_aoerr0:1;			/* RW */
+		unsigned long	ni1_aoerr0:1;			/* RW */
+		unsigned long	lb_aoerr1:1;			/* RW */
+		unsigned long	qp_aoerr1:1;			/* RW */
+		unsigned long	rh_aoerr1:1;			/* RW */
+		unsigned long	lh0_aoerr1:1;			/* RW */
+		unsigned long	lh1_aoerr1:1;			/* RW */
+		unsigned long	gr0_aoerr1:1;			/* RW */
+		unsigned long	gr1_aoerr1:1;			/* RW */
+		unsigned long	xb_aoerr1:1;			/* RW */
+		unsigned long	rt_aoerr1:1;			/* RW */
+		unsigned long	ni0_aoerr1:1;			/* RW */
+		unsigned long	ni1_aoerr1:1;			/* RW */
+		unsigned long	system_shutdown_int:1;		/* RW */
+		unsigned long	lb_irq_int_0:1;			/* RW */
+		unsigned long	lb_irq_int_1:1;			/* RW */
+		unsigned long	lb_irq_int_2:1;			/* RW */
+		unsigned long	lb_irq_int_3:1;			/* RW */
+		unsigned long	lb_irq_int_4:1;			/* RW */
+		unsigned long	lb_irq_int_5:1;			/* RW */
+		unsigned long	lb_irq_int_6:1;			/* RW */
+		unsigned long	lb_irq_int_7:1;			/* RW */
+		unsigned long	lb_irq_int_8:1;			/* RW */
+		unsigned long	lb_irq_int_9:1;			/* RW */
+		unsigned long	lb_irq_int_10:1;		/* RW */
+		unsigned long	lb_irq_int_11:1;		/* RW */
+		unsigned long	lb_irq_int_12:1;		/* RW */
+		unsigned long	lb_irq_int_13:1;		/* RW */
+		unsigned long	lb_irq_int_14:1;		/* RW */
+		unsigned long	lb_irq_int_15:1;		/* RW */
+		unsigned long	l1_nmi_int:1;			/* RW */
+		unsigned long	stop_clock:1;			/* RW */
+		unsigned long	asic_to_l1:1;			/* RW */
+		unsigned long	l1_to_asic:1;			/* RW */
+		unsigned long	la_seq_trigger:1;		/* RW */
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	rsvd_59_63:5;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                         UVH_GR0_TLB_INT0_CONFIG                           */
+/*                        UVH_EVENT_OCCURRED0_ALIAS                          */
 /* ========================================================================= */
-#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL
-
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT		0
-#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT			8
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT		12
-#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT			13
-#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT			15
-#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT			16
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR0_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR0_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR0_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
-
+#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL
 
-union uvh_gr0_tlb_int0_config_u {
-	unsigned long	v;
-	struct uvh_gr0_tlb_int0_config_s {
-		unsigned long	vector_:8;			/* RW */
-		unsigned long	dm:3;				/* RW */
-		unsigned long	destmode:1;			/* RW */
-		unsigned long	status:1;			/* RO */
-		unsigned long	p:1;				/* RO */
-		unsigned long	rsvd_14:1;
-		unsigned long	t:1;				/* RO */
-		unsigned long	m:1;				/* RW */
-		unsigned long	rsvd_17_31:15;
-		unsigned long	apic_id:32;			/* RW */
-	} s;
-};
 
 /* ========================================================================= */
-/*                         UVH_GR0_TLB_INT1_CONFIG                           */
+/*                           UVH_EVENT_OCCURRED1                             */
 /* ========================================================================= */
-#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL
-
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT		0
-#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT			8
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT		12
-#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT			13
-#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT			15
-#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT			16
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR0_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR0_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR0_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
-
-
-union uvh_gr0_tlb_int1_config_u {
+#define UVH_EVENT_OCCURRED1 0x70080UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED1_IPI_INT_SHFT		0
+#define UVYH_EVENT_OCCURRED1_IPI_INT_MASK		0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_SHFT		1
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT0_MASK		0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_SHFT		2
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT1_MASK		0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_SHFT		3
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT2_MASK		0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_SHFT		4
+#define UVYH_EVENT_OCCURRED1_EXTIO_INT3_MASK		0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_SHFT		5
+#define UVYH_EVENT_OCCURRED1_PROFILE_INT_MASK		0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_SHFT		6
+#define UVYH_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_SHFT		7
+#define UVYH_EVENT_OCCURRED1_PROC_GENERAL_MASK		0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_SHFT		8
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT0_MASK		0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_SHFT		9
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT1_MASK		0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_SHFT		10
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT2_MASK		0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_SHFT		11
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT3_MASK		0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_SHFT		12
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT4_MASK		0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_SHFT		13
+#define UVYH_EVENT_OCCURRED1_XH_TLB_INT5_MASK		0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_SHFT		14
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT0_MASK		0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_SHFT		15
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT1_MASK		0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_SHFT		16
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT2_MASK		0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_SHFT		17
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT3_MASK		0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_SHFT		18
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT4_MASK		0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_SHFT		19
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT5_MASK		0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_SHFT		20
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT6_MASK		0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_SHFT		21
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT7_MASK		0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_SHFT		22
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT8_MASK		0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_SHFT		23
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT9_MASK		0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_SHFT		24
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT10_MASK		0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_SHFT		25
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT11_MASK		0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_SHFT		26
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT12_MASK		0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_SHFT		27
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT13_MASK		0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_SHFT		28
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT14_MASK		0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_SHFT		29
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT15_MASK		0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_SHFT		30
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT16_MASK		0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_SHFT		31
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT17_MASK		0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_SHFT		32
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT18_MASK		0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_SHFT		33
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT19_MASK		0x0000000200000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_SHFT		34
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT20_MASK		0x0000000400000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_SHFT		35
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT21_MASK		0x0000000800000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_SHFT		36
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT22_MASK		0x0000001000000000UL
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_SHFT		37
+#define UVYH_EVENT_OCCURRED1_RDM_TLB_INT23_MASK		0x0000002000000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_SHFT		0
+#define UV4H_EVENT_OCCURRED1_PROFILE_INT_MASK		0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_SHFT		1
+#define UV4H_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_SHFT		2
+#define UV4H_EVENT_OCCURRED1_PROC_GENERAL_MASK		0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT		3
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK		0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT		4
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK		0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT		5
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK		0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT		6
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK		0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT		7
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK		0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT		8
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK		0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT		9
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK		0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT		10
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK		0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT		11
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK		0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT		12
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK		0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT		13
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK		0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT		14
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK		0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT		15
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK		0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT		16
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK		0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT		17
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK		0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT		18
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK		0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_SHFT		19
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT16_MASK		0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_SHFT		20
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT17_MASK		0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_SHFT		21
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT18_MASK		0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_SHFT		22
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT19_MASK		0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_SHFT		23
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT20_MASK		0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_SHFT		24
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT21_MASK		0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_SHFT		25
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT22_MASK		0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_SHFT		26
+#define UV4H_EVENT_OCCURRED1_GR0_TLB_INT23_MASK		0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT		27
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK		0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT		28
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT		29
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT		30
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT		31
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT		32
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT		33
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT		34
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT		35
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT		36
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT		37
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK		0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT		38
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT		39
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT		40
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT		41
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT		42
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_SHFT		43
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT16_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_SHFT		44
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT17_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_SHFT		45
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT18_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_SHFT		46
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT19_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_SHFT		47
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT20_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_SHFT		48
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT21_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_SHFT		49
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT22_MASK		0x0002000000000000UL
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_SHFT		50
+#define UV4H_EVENT_OCCURRED1_GR1_TLB_INT23_MASK		0x0004000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_SHFT		0
+#define UV3H_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT	1
+#define UV3H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK	0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV3H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT		18
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT		19
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT		20
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT		21
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT		22
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT		23
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT		24
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT		25
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT		26
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT		27
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT		28
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT		29
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT		30
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT		31
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK		0x0000000080000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT		32
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK		0x0000000100000000UL
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT		33
+#define UV3H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK		0x0000000200000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT		34
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK		0x0000000400000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT		35
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK		0x0000000800000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT		36
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK		0x0000001000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT		37
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK		0x0000002000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT		38
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK		0x0000004000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT		39
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK		0x0000008000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT		40
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK		0x0000010000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT		41
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK		0x0000020000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT		42
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK		0x0000040000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT		43
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK		0x0000080000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT		44
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK		0x0000100000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT		45
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK		0x0000200000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT		46
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK		0x0000400000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT		47
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK		0x0000800000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT		48
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK		0x0001000000000000UL
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT		49
+#define UV3H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK		0x0002000000000000UL
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT	50
+#define UV3H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK	0x0004000000000000UL
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT	51
+#define UV3H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK	0x0008000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_SHFT		0
+#define UV2H_EVENT_OCCURRED1_BAU_DATA_MASK		0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_SHFT	1
+#define UV2H_EVENT_OCCURRED1_POWER_MANAGEMENT_REQ_MASK	0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_SHFT 2
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_SHFT 3
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_SHFT 4
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_SHFT 5
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_SHFT 6
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_SHFT 7
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_SHFT 8
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_SHFT 9
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_SHFT 10
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_SHFT 11
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_SHFT 12
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_SHFT 13
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_SHFT 14
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_SHFT 15
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_SHFT 16
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_SHFT 17
+#define UV2H_EVENT_OCCURRED1_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_SHFT		18
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT0_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_SHFT		19
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT1_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_SHFT		20
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT2_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_SHFT		21
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT3_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_SHFT		22
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT4_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_SHFT		23
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT5_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_SHFT		24
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT6_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_SHFT		25
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT7_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_SHFT		26
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT8_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_SHFT		27
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT9_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_SHFT		28
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT10_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_SHFT		29
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT11_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_SHFT		30
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT12_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_SHFT		31
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT13_MASK		0x0000000080000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_SHFT		32
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT14_MASK		0x0000000100000000UL
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_SHFT		33
+#define UV2H_EVENT_OCCURRED1_GR0_TLB_INT15_MASK		0x0000000200000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_SHFT		34
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT0_MASK		0x0000000400000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_SHFT		35
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT1_MASK		0x0000000800000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_SHFT		36
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT2_MASK		0x0000001000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_SHFT		37
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT3_MASK		0x0000002000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_SHFT		38
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT4_MASK		0x0000004000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_SHFT		39
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT5_MASK		0x0000008000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_SHFT		40
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT6_MASK		0x0000010000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_SHFT		41
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT7_MASK		0x0000020000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_SHFT		42
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT8_MASK		0x0000040000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_SHFT		43
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT9_MASK		0x0000080000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_SHFT		44
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT10_MASK		0x0000100000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_SHFT		45
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT11_MASK		0x0000200000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_SHFT		46
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT12_MASK		0x0000400000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_SHFT		47
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT13_MASK		0x0000800000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_SHFT		48
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT14_MASK		0x0001000000000000UL
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_SHFT		49
+#define UV2H_EVENT_OCCURRED1_GR1_TLB_INT15_MASK		0x0002000000000000UL
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_SHFT	50
+#define UV2H_EVENT_OCCURRED1_RTC_INTERVAL_INT_MASK	0x0004000000000000UL
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_SHFT	51
+#define UV2H_EVENT_OCCURRED1_BAU_DASHBOARD_INT_MASK	0x0008000000000000UL
+
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK (				\
+	is_uv(UV5) ? 0x0000000000000002UL :				\
+	0)
+#define UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT (				\
+	is_uv(UV5) ? 1 :						\
+	-1)
+
+union uvyh_event_occurred1_u {
 	unsigned long	v;
-	struct uvh_gr0_tlb_int1_config_s {
-		unsigned long	vector_:8;			/* RW */
-		unsigned long	dm:3;				/* RW */
-		unsigned long	destmode:1;			/* RW */
-		unsigned long	status:1;			/* RO */
-		unsigned long	p:1;				/* RO */
-		unsigned long	rsvd_14:1;
-		unsigned long	t:1;				/* RO */
-		unsigned long	m:1;				/* RW */
-		unsigned long	rsvd_17_31:15;
-		unsigned long	apic_id:32;			/* RW */
-	} s;
-};
 
-/* ========================================================================= */
-/*                         UVH_GR0_TLB_MMR_CONTROL                           */
-/* ========================================================================= */
-#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL
-#define UV4H_GR0_TLB_MMR_CONTROL 0x601080UL
-#define UVH_GR0_TLB_MMR_CONTROL (					\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL :			\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL :			\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL)
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-
-#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT	48
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT	52
-#define UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK	0x0001000000000000UL
-#define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK	0x0010000000000000UL
-
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT		13
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_SHFT		59
-#define UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000001fffUL
-#define UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000006000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV4H_GR0_TLB_MMR_CONTROL_PAGE_SIZE_MASK		0xf800000000000000UL
-
-#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK (				\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK :		\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_INDEX_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK (				\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK :		\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK)
-#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT (				\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT :		\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT)
-
-union uvh_gr0_tlb_mmr_control_u {
-	unsigned long	v;
-	struct uvh_gr0_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	rsvd_32_48:17;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
-	} s;
-	struct uvxh_gr0_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	rsvd_48:1;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
-	} sx;
-	struct uv2h_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	mmr_inj_con:1;			/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	mmr_inj_tlbram:1;		/* RW */
-		unsigned long	rsvd_53_63:11;
-	} s2;
-	struct uv3h_gr0_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	undef_52:1;			/* Undefined */
-		unsigned long	rsvd_53_63:11;
-	} s3;
-	struct uv4h_gr0_tlb_mmr_control_s {
-		unsigned long	index:13;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_15:1;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_58:7;
-		unsigned long	page_size:5;			/* RW */
+	/* UVYH common struct */
+	struct uvyh_event_occurred1_s {
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	proc_general:1;			/* RW */
+		unsigned long	xh_tlb_int0:1;			/* RW */
+		unsigned long	xh_tlb_int1:1;			/* RW */
+		unsigned long	xh_tlb_int2:1;			/* RW */
+		unsigned long	xh_tlb_int3:1;			/* RW */
+		unsigned long	xh_tlb_int4:1;			/* RW */
+		unsigned long	xh_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int0:1;			/* RW */
+		unsigned long	rdm_tlb_int1:1;			/* RW */
+		unsigned long	rdm_tlb_int2:1;			/* RW */
+		unsigned long	rdm_tlb_int3:1;			/* RW */
+		unsigned long	rdm_tlb_int4:1;			/* RW */
+		unsigned long	rdm_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int6:1;			/* RW */
+		unsigned long	rdm_tlb_int7:1;			/* RW */
+		unsigned long	rdm_tlb_int8:1;			/* RW */
+		unsigned long	rdm_tlb_int9:1;			/* RW */
+		unsigned long	rdm_tlb_int10:1;		/* RW */
+		unsigned long	rdm_tlb_int11:1;		/* RW */
+		unsigned long	rdm_tlb_int12:1;		/* RW */
+		unsigned long	rdm_tlb_int13:1;		/* RW */
+		unsigned long	rdm_tlb_int14:1;		/* RW */
+		unsigned long	rdm_tlb_int15:1;		/* RW */
+		unsigned long	rdm_tlb_int16:1;		/* RW */
+		unsigned long	rdm_tlb_int17:1;		/* RW */
+		unsigned long	rdm_tlb_int18:1;		/* RW */
+		unsigned long	rdm_tlb_int19:1;		/* RW */
+		unsigned long	rdm_tlb_int20:1;		/* RW */
+		unsigned long	rdm_tlb_int21:1;		/* RW */
+		unsigned long	rdm_tlb_int22:1;		/* RW */
+		unsigned long	rdm_tlb_int23:1;		/* RW */
+		unsigned long	rsvd_38_63:26;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_event_occurred1_s {
+		unsigned long	ipi_int:1;			/* RW */
+		unsigned long	extio_int0:1;			/* RW */
+		unsigned long	extio_int1:1;			/* RW */
+		unsigned long	extio_int2:1;			/* RW */
+		unsigned long	extio_int3:1;			/* RW */
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	proc_general:1;			/* RW */
+		unsigned long	xh_tlb_int0:1;			/* RW */
+		unsigned long	xh_tlb_int1:1;			/* RW */
+		unsigned long	xh_tlb_int2:1;			/* RW */
+		unsigned long	xh_tlb_int3:1;			/* RW */
+		unsigned long	xh_tlb_int4:1;			/* RW */
+		unsigned long	xh_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int0:1;			/* RW */
+		unsigned long	rdm_tlb_int1:1;			/* RW */
+		unsigned long	rdm_tlb_int2:1;			/* RW */
+		unsigned long	rdm_tlb_int3:1;			/* RW */
+		unsigned long	rdm_tlb_int4:1;			/* RW */
+		unsigned long	rdm_tlb_int5:1;			/* RW */
+		unsigned long	rdm_tlb_int6:1;			/* RW */
+		unsigned long	rdm_tlb_int7:1;			/* RW */
+		unsigned long	rdm_tlb_int8:1;			/* RW */
+		unsigned long	rdm_tlb_int9:1;			/* RW */
+		unsigned long	rdm_tlb_int10:1;		/* RW */
+		unsigned long	rdm_tlb_int11:1;		/* RW */
+		unsigned long	rdm_tlb_int12:1;		/* RW */
+		unsigned long	rdm_tlb_int13:1;		/* RW */
+		unsigned long	rdm_tlb_int14:1;		/* RW */
+		unsigned long	rdm_tlb_int15:1;		/* RW */
+		unsigned long	rdm_tlb_int16:1;		/* RW */
+		unsigned long	rdm_tlb_int17:1;		/* RW */
+		unsigned long	rdm_tlb_int18:1;		/* RW */
+		unsigned long	rdm_tlb_int19:1;		/* RW */
+		unsigned long	rdm_tlb_int20:1;		/* RW */
+		unsigned long	rdm_tlb_int21:1;		/* RW */
+		unsigned long	rdm_tlb_int22:1;		/* RW */
+		unsigned long	rdm_tlb_int23:1;		/* RW */
+		unsigned long	rsvd_38_63:26;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_event_occurred1_s {
+		unsigned long	profile_int:1;			/* RW */
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	proc_general:1;			/* RW */
+		unsigned long	gr0_tlb_int0:1;			/* RW */
+		unsigned long	gr0_tlb_int1:1;			/* RW */
+		unsigned long	gr0_tlb_int2:1;			/* RW */
+		unsigned long	gr0_tlb_int3:1;			/* RW */
+		unsigned long	gr0_tlb_int4:1;			/* RW */
+		unsigned long	gr0_tlb_int5:1;			/* RW */
+		unsigned long	gr0_tlb_int6:1;			/* RW */
+		unsigned long	gr0_tlb_int7:1;			/* RW */
+		unsigned long	gr0_tlb_int8:1;			/* RW */
+		unsigned long	gr0_tlb_int9:1;			/* RW */
+		unsigned long	gr0_tlb_int10:1;		/* RW */
+		unsigned long	gr0_tlb_int11:1;		/* RW */
+		unsigned long	gr0_tlb_int12:1;		/* RW */
+		unsigned long	gr0_tlb_int13:1;		/* RW */
+		unsigned long	gr0_tlb_int14:1;		/* RW */
+		unsigned long	gr0_tlb_int15:1;		/* RW */
+		unsigned long	gr0_tlb_int16:1;		/* RW */
+		unsigned long	gr0_tlb_int17:1;		/* RW */
+		unsigned long	gr0_tlb_int18:1;		/* RW */
+		unsigned long	gr0_tlb_int19:1;		/* RW */
+		unsigned long	gr0_tlb_int20:1;		/* RW */
+		unsigned long	gr0_tlb_int21:1;		/* RW */
+		unsigned long	gr0_tlb_int22:1;		/* RW */
+		unsigned long	gr0_tlb_int23:1;		/* RW */
+		unsigned long	gr1_tlb_int0:1;			/* RW */
+		unsigned long	gr1_tlb_int1:1;			/* RW */
+		unsigned long	gr1_tlb_int2:1;			/* RW */
+		unsigned long	gr1_tlb_int3:1;			/* RW */
+		unsigned long	gr1_tlb_int4:1;			/* RW */
+		unsigned long	gr1_tlb_int5:1;			/* RW */
+		unsigned long	gr1_tlb_int6:1;			/* RW */
+		unsigned long	gr1_tlb_int7:1;			/* RW */
+		unsigned long	gr1_tlb_int8:1;			/* RW */
+		unsigned long	gr1_tlb_int9:1;			/* RW */
+		unsigned long	gr1_tlb_int10:1;		/* RW */
+		unsigned long	gr1_tlb_int11:1;		/* RW */
+		unsigned long	gr1_tlb_int12:1;		/* RW */
+		unsigned long	gr1_tlb_int13:1;		/* RW */
+		unsigned long	gr1_tlb_int14:1;		/* RW */
+		unsigned long	gr1_tlb_int15:1;		/* RW */
+		unsigned long	gr1_tlb_int16:1;		/* RW */
+		unsigned long	gr1_tlb_int17:1;		/* RW */
+		unsigned long	gr1_tlb_int18:1;		/* RW */
+		unsigned long	gr1_tlb_int19:1;		/* RW */
+		unsigned long	gr1_tlb_int20:1;		/* RW */
+		unsigned long	gr1_tlb_int21:1;		/* RW */
+		unsigned long	gr1_tlb_int22:1;		/* RW */
+		unsigned long	gr1_tlb_int23:1;		/* RW */
+		unsigned long	rsvd_51_63:13;
 	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR0_TLB_MMR_READ_DATA_HI                        */
-/* ========================================================================= */
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI 0x6010a0UL
-#define UVH_GR0_TLB_MMR_READ_DATA_HI (					\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI :			\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_HI :			\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	45
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0000200000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_SHFT		34
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT		49
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	51
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT	52
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	53
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK		0x00000003ffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_PNID_MASK		0x0001fffc00000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0006000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0008000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0010000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0020000000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_hi_u {
-	unsigned long	v;
-	struct uv2h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s2;
-	struct uv3h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_46_54:9;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
+	/* UV3 unique struct */
+	struct uv3h_event_occurred1_s {
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	power_management_req:1;		/* RW */
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	gr0_tlb_int0:1;			/* RW */
+		unsigned long	gr0_tlb_int1:1;			/* RW */
+		unsigned long	gr0_tlb_int2:1;			/* RW */
+		unsigned long	gr0_tlb_int3:1;			/* RW */
+		unsigned long	gr0_tlb_int4:1;			/* RW */
+		unsigned long	gr0_tlb_int5:1;			/* RW */
+		unsigned long	gr0_tlb_int6:1;			/* RW */
+		unsigned long	gr0_tlb_int7:1;			/* RW */
+		unsigned long	gr0_tlb_int8:1;			/* RW */
+		unsigned long	gr0_tlb_int9:1;			/* RW */
+		unsigned long	gr0_tlb_int10:1;		/* RW */
+		unsigned long	gr0_tlb_int11:1;		/* RW */
+		unsigned long	gr0_tlb_int12:1;		/* RW */
+		unsigned long	gr0_tlb_int13:1;		/* RW */
+		unsigned long	gr0_tlb_int14:1;		/* RW */
+		unsigned long	gr0_tlb_int15:1;		/* RW */
+		unsigned long	gr1_tlb_int0:1;			/* RW */
+		unsigned long	gr1_tlb_int1:1;			/* RW */
+		unsigned long	gr1_tlb_int2:1;			/* RW */
+		unsigned long	gr1_tlb_int3:1;			/* RW */
+		unsigned long	gr1_tlb_int4:1;			/* RW */
+		unsigned long	gr1_tlb_int5:1;			/* RW */
+		unsigned long	gr1_tlb_int6:1;			/* RW */
+		unsigned long	gr1_tlb_int7:1;			/* RW */
+		unsigned long	gr1_tlb_int8:1;			/* RW */
+		unsigned long	gr1_tlb_int9:1;			/* RW */
+		unsigned long	gr1_tlb_int10:1;		/* RW */
+		unsigned long	gr1_tlb_int11:1;		/* RW */
+		unsigned long	gr1_tlb_int12:1;		/* RW */
+		unsigned long	gr1_tlb_int13:1;		/* RW */
+		unsigned long	gr1_tlb_int14:1;		/* RW */
+		unsigned long	gr1_tlb_int15:1;		/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rsvd_52_63:12;
 	} s3;
-	struct uv4h_gr0_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:34;				/* RO */
-		unsigned long	pnid:15;			/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_54:1;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
-	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR0_TLB_MMR_READ_DATA_LO                        */
-/* ========================================================================= */
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO 0x6010a8UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO (					\
-	is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO :			\
-	is_uv3_hub() ? UV3H_GR0_TLB_MMR_READ_DATA_LO :			\
-	/*is_uv4_hub*/ UV4H_GR0_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT		63
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK		0x8000000000000000UL
-
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV4H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-
-union uvh_gr0_tlb_mmr_read_data_lo_u {
-	unsigned long	v;
-	struct uvh_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s;
-	struct uvxh_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} sx;
-	struct uv2h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
+	/* UV2 unique struct */
+	struct uv2h_event_occurred1_s {
+		unsigned long	bau_data:1;			/* RW */
+		unsigned long	power_management_req:1;		/* RW */
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	gr0_tlb_int0:1;			/* RW */
+		unsigned long	gr0_tlb_int1:1;			/* RW */
+		unsigned long	gr0_tlb_int2:1;			/* RW */
+		unsigned long	gr0_tlb_int3:1;			/* RW */
+		unsigned long	gr0_tlb_int4:1;			/* RW */
+		unsigned long	gr0_tlb_int5:1;			/* RW */
+		unsigned long	gr0_tlb_int6:1;			/* RW */
+		unsigned long	gr0_tlb_int7:1;			/* RW */
+		unsigned long	gr0_tlb_int8:1;			/* RW */
+		unsigned long	gr0_tlb_int9:1;			/* RW */
+		unsigned long	gr0_tlb_int10:1;		/* RW */
+		unsigned long	gr0_tlb_int11:1;		/* RW */
+		unsigned long	gr0_tlb_int12:1;		/* RW */
+		unsigned long	gr0_tlb_int13:1;		/* RW */
+		unsigned long	gr0_tlb_int14:1;		/* RW */
+		unsigned long	gr0_tlb_int15:1;		/* RW */
+		unsigned long	gr1_tlb_int0:1;			/* RW */
+		unsigned long	gr1_tlb_int1:1;			/* RW */
+		unsigned long	gr1_tlb_int2:1;			/* RW */
+		unsigned long	gr1_tlb_int3:1;			/* RW */
+		unsigned long	gr1_tlb_int4:1;			/* RW */
+		unsigned long	gr1_tlb_int5:1;			/* RW */
+		unsigned long	gr1_tlb_int6:1;			/* RW */
+		unsigned long	gr1_tlb_int7:1;			/* RW */
+		unsigned long	gr1_tlb_int8:1;			/* RW */
+		unsigned long	gr1_tlb_int9:1;			/* RW */
+		unsigned long	gr1_tlb_int10:1;		/* RW */
+		unsigned long	gr1_tlb_int11:1;		/* RW */
+		unsigned long	gr1_tlb_int12:1;		/* RW */
+		unsigned long	gr1_tlb_int13:1;		/* RW */
+		unsigned long	gr1_tlb_int14:1;		/* RW */
+		unsigned long	gr1_tlb_int15:1;		/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rsvd_52_63:12;
 	} s2;
-	struct uv3h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s3;
-	struct uv4h_gr0_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s4;
 };
 
 /* ========================================================================= */
-/*                         UVH_GR1_TLB_INT0_CONFIG                           */
+/*                        UVH_EVENT_OCCURRED1_ALIAS                          */
 /* ========================================================================= */
-#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL
-#define UV4H_GR1_TLB_INT0_CONFIG 0x62100UL
-#define UVH_GR1_TLB_INT0_CONFIG (					\
-	is_uv2_hub() ? UV2H_GR1_TLB_INT0_CONFIG :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_INT0_CONFIG :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_INT0_CONFIG)
-
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT		0
-#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT			8
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT		12
-#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT			13
-#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT			15
-#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT			16
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR1_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR1_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR1_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+#define UVH_EVENT_OCCURRED1_ALIAS 0x70088UL
 
 
-union uvh_gr1_tlb_int0_config_u {
-	unsigned long	v;
-	struct uvh_gr1_tlb_int0_config_s {
-		unsigned long	vector_:8;			/* RW */
-		unsigned long	dm:3;				/* RW */
-		unsigned long	destmode:1;			/* RW */
-		unsigned long	status:1;			/* RO */
-		unsigned long	p:1;				/* RO */
-		unsigned long	rsvd_14:1;
-		unsigned long	t:1;				/* RO */
-		unsigned long	m:1;				/* RW */
-		unsigned long	rsvd_17_31:15;
-		unsigned long	apic_id:32;			/* RW */
-	} s;
-};
-
 /* ========================================================================= */
-/*                         UVH_GR1_TLB_INT1_CONFIG                           */
+/*                           UVH_EVENT_OCCURRED2                             */
 /* ========================================================================= */
-#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL
-#define UV4H_GR1_TLB_INT1_CONFIG 0x62140UL
-#define UVH_GR1_TLB_INT1_CONFIG (					\
-	is_uv2_hub() ? UV2H_GR1_TLB_INT1_CONFIG :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_INT1_CONFIG :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_INT1_CONFIG)
-
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT		0
-#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT			8
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT		11
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT		12
-#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT			13
-#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT			15
-#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT			16
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT		32
-#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
-#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
-#define UVH_GR1_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_GR1_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_GR1_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+#define UVH_EVENT_OCCURRED2 0x70100UL
+
+
+
+/* UVYH common defines */
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	0
+#define UVYH_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000000001UL
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	1
+#define UVYH_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000000002UL
+#define UVYH_EVENT_OCCURRED2_RTC_0_SHFT			2
+#define UVYH_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000004UL
+#define UVYH_EVENT_OCCURRED2_RTC_1_SHFT			3
+#define UVYH_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000008UL
+#define UVYH_EVENT_OCCURRED2_RTC_2_SHFT			4
+#define UVYH_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000010UL
+#define UVYH_EVENT_OCCURRED2_RTC_3_SHFT			5
+#define UVYH_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000020UL
+#define UVYH_EVENT_OCCURRED2_RTC_4_SHFT			6
+#define UVYH_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000040UL
+#define UVYH_EVENT_OCCURRED2_RTC_5_SHFT			7
+#define UVYH_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000080UL
+#define UVYH_EVENT_OCCURRED2_RTC_6_SHFT			8
+#define UVYH_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000100UL
+#define UVYH_EVENT_OCCURRED2_RTC_7_SHFT			9
+#define UVYH_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000200UL
+#define UVYH_EVENT_OCCURRED2_RTC_8_SHFT			10
+#define UVYH_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000400UL
+#define UVYH_EVENT_OCCURRED2_RTC_9_SHFT			11
+#define UVYH_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000800UL
+#define UVYH_EVENT_OCCURRED2_RTC_10_SHFT		12
+#define UVYH_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000001000UL
+#define UVYH_EVENT_OCCURRED2_RTC_11_SHFT		13
+#define UVYH_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000002000UL
+#define UVYH_EVENT_OCCURRED2_RTC_12_SHFT		14
+#define UVYH_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000004000UL
+#define UVYH_EVENT_OCCURRED2_RTC_13_SHFT		15
+#define UVYH_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000008000UL
+#define UVYH_EVENT_OCCURRED2_RTC_14_SHFT		16
+#define UVYH_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000010000UL
+#define UVYH_EVENT_OCCURRED2_RTC_15_SHFT		17
+#define UVYH_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000020000UL
+#define UVYH_EVENT_OCCURRED2_RTC_16_SHFT		18
+#define UVYH_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000040000UL
+#define UVYH_EVENT_OCCURRED2_RTC_17_SHFT		19
+#define UVYH_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000080000UL
+#define UVYH_EVENT_OCCURRED2_RTC_18_SHFT		20
+#define UVYH_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000100000UL
+#define UVYH_EVENT_OCCURRED2_RTC_19_SHFT		21
+#define UVYH_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000200000UL
+#define UVYH_EVENT_OCCURRED2_RTC_20_SHFT		22
+#define UVYH_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000400000UL
+#define UVYH_EVENT_OCCURRED2_RTC_21_SHFT		23
+#define UVYH_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000800000UL
+#define UVYH_EVENT_OCCURRED2_RTC_22_SHFT		24
+#define UVYH_EVENT_OCCURRED2_RTC_22_MASK		0x0000000001000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_23_SHFT		25
+#define UVYH_EVENT_OCCURRED2_RTC_23_MASK		0x0000000002000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_24_SHFT		26
+#define UVYH_EVENT_OCCURRED2_RTC_24_MASK		0x0000000004000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_25_SHFT		27
+#define UVYH_EVENT_OCCURRED2_RTC_25_MASK		0x0000000008000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_26_SHFT		28
+#define UVYH_EVENT_OCCURRED2_RTC_26_MASK		0x0000000010000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_27_SHFT		29
+#define UVYH_EVENT_OCCURRED2_RTC_27_MASK		0x0000000020000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_28_SHFT		30
+#define UVYH_EVENT_OCCURRED2_RTC_28_MASK		0x0000000040000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_29_SHFT		31
+#define UVYH_EVENT_OCCURRED2_RTC_29_MASK		0x0000000080000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_30_SHFT		32
+#define UVYH_EVENT_OCCURRED2_RTC_30_MASK		0x0000000100000000UL
+#define UVYH_EVENT_OCCURRED2_RTC_31_SHFT		33
+#define UVYH_EVENT_OCCURRED2_RTC_31_MASK		0x0000000200000000UL
+
+/* UV4 unique defines */
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
+#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	16
+#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000010000UL
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	17
+#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000020000UL
+#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT			18
+#define UV4H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000040000UL
+#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT			19
+#define UV4H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000080000UL
+#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT			20
+#define UV4H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000100000UL
+#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT			21
+#define UV4H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000200000UL
+#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT			22
+#define UV4H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000400000UL
+#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT			23
+#define UV4H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000800000UL
+#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT			24
+#define UV4H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000001000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT			25
+#define UV4H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000002000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT			26
+#define UV4H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000004000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT			27
+#define UV4H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000008000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT		28
+#define UV4H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000010000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT		29
+#define UV4H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000020000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT		30
+#define UV4H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000040000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT		31
+#define UV4H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000080000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT		32
+#define UV4H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000100000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT		33
+#define UV4H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000200000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT		34
+#define UV4H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000400000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT		35
+#define UV4H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000800000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT		36
+#define UV4H_EVENT_OCCURRED2_RTC_18_MASK		0x0000001000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT		37
+#define UV4H_EVENT_OCCURRED2_RTC_19_MASK		0x0000002000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT		38
+#define UV4H_EVENT_OCCURRED2_RTC_20_MASK		0x0000004000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT		39
+#define UV4H_EVENT_OCCURRED2_RTC_21_MASK		0x0000008000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT		40
+#define UV4H_EVENT_OCCURRED2_RTC_22_MASK		0x0000010000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT		41
+#define UV4H_EVENT_OCCURRED2_RTC_23_MASK		0x0000020000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT		42
+#define UV4H_EVENT_OCCURRED2_RTC_24_MASK		0x0000040000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT		43
+#define UV4H_EVENT_OCCURRED2_RTC_25_MASK		0x0000080000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT		44
+#define UV4H_EVENT_OCCURRED2_RTC_26_MASK		0x0000100000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT		45
+#define UV4H_EVENT_OCCURRED2_RTC_27_MASK		0x0000200000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT		46
+#define UV4H_EVENT_OCCURRED2_RTC_28_MASK		0x0000400000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT		47
+#define UV4H_EVENT_OCCURRED2_RTC_29_MASK		0x0000800000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT		48
+#define UV4H_EVENT_OCCURRED2_RTC_30_MASK		0x0001000000000000UL
+#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT		49
+#define UV4H_EVENT_OCCURRED2_RTC_31_MASK		0x0002000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT			0
+#define UV3H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
+#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT			1
+#define UV3H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
+#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT			2
+#define UV3H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
+#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT			3
+#define UV3H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
+#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT			4
+#define UV3H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
+#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT			5
+#define UV3H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
+#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT			6
+#define UV3H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
+#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT			7
+#define UV3H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
+#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT			8
+#define UV3H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
+#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT			9
+#define UV3H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
+#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT		10
+#define UV3H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
+#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT		11
+#define UV3H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
+#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT		12
+#define UV3H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
+#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT		13
+#define UV3H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
+#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT		14
+#define UV3H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
+#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT		15
+#define UV3H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
+#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT		16
+#define UV3H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
+#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT		17
+#define UV3H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
+#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT		18
+#define UV3H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
+#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT		19
+#define UV3H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
+#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT		20
+#define UV3H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
+#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT		21
+#define UV3H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
+#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT		22
+#define UV3H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
+#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT		23
+#define UV3H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
+#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT		24
+#define UV3H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT		25
+#define UV3H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT		26
+#define UV3H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT		27
+#define UV3H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT		28
+#define UV3H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT		29
+#define UV3H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT		30
+#define UV3H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
+#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT		31
+#define UV3H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+
+/* UV2 unique defines */
+#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT			0
+#define UV2H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
+#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT			1
+#define UV2H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
+#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT			2
+#define UV2H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
+#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT			3
+#define UV2H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
+#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT			4
+#define UV2H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
+#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT			5
+#define UV2H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
+#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT			6
+#define UV2H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
+#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT			7
+#define UV2H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
+#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT			8
+#define UV2H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
+#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT			9
+#define UV2H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
+#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT		10
+#define UV2H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
+#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT		11
+#define UV2H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
+#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT		12
+#define UV2H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
+#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT		13
+#define UV2H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
+#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT		14
+#define UV2H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
+#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT		15
+#define UV2H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
+#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT		16
+#define UV2H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
+#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT		17
+#define UV2H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
+#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT		18
+#define UV2H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
+#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT		19
+#define UV2H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
+#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT		20
+#define UV2H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
+#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT		21
+#define UV2H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
+#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT		22
+#define UV2H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
+#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT		23
+#define UV2H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
+#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT		24
+#define UV2H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT		25
+#define UV2H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT		26
+#define UV2H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT		27
+#define UV2H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT		28
+#define UV2H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT		29
+#define UV2H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT		30
+#define UV2H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
+#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT		31
+#define UV2H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+
+#define UVH_EVENT_OCCURRED2_RTC_1_MASK (				\
+	is_uv(UV5) ? 0x0000000000000008UL :				\
+	is_uv(UV4) ? 0x0000000000080000UL :				\
+	is_uv(UV3) ? 0x0000000000000002UL :				\
+	is_uv(UV2) ? 0x0000000000000002UL :				\
+	0)
+#define UVH_EVENT_OCCURRED2_RTC_1_SHFT (				\
+	is_uv(UV5) ? 3 :						\
+	is_uv(UV4) ? 19 :						\
+	is_uv(UV3) ? 1 :						\
+	is_uv(UV2) ? 1 :						\
+	-1)
+
+union uvyh_event_occurred2_u {
+	unsigned long	v;
+
+	/* UVYH common struct */
+	struct uvyh_event_occurred2_s {
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_34_63:30;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_event_occurred2_s {
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_34_63:30;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_event_occurred2_s {
+		unsigned long	message_accelerator_int0:1;	/* RW */
+		unsigned long	message_accelerator_int1:1;	/* RW */
+		unsigned long	message_accelerator_int2:1;	/* RW */
+		unsigned long	message_accelerator_int3:1;	/* RW */
+		unsigned long	message_accelerator_int4:1;	/* RW */
+		unsigned long	message_accelerator_int5:1;	/* RW */
+		unsigned long	message_accelerator_int6:1;	/* RW */
+		unsigned long	message_accelerator_int7:1;	/* RW */
+		unsigned long	message_accelerator_int8:1;	/* RW */
+		unsigned long	message_accelerator_int9:1;	/* RW */
+		unsigned long	message_accelerator_int10:1;	/* RW */
+		unsigned long	message_accelerator_int11:1;	/* RW */
+		unsigned long	message_accelerator_int12:1;	/* RW */
+		unsigned long	message_accelerator_int13:1;	/* RW */
+		unsigned long	message_accelerator_int14:1;	/* RW */
+		unsigned long	message_accelerator_int15:1;	/* RW */
+		unsigned long	rtc_interval_int:1;		/* RW */
+		unsigned long	bau_dashboard_int:1;		/* RW */
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_50_63:14;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_event_occurred2_s {
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_32_63:32;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_event_occurred2_s {
+		unsigned long	rtc_0:1;			/* RW */
+		unsigned long	rtc_1:1;			/* RW */
+		unsigned long	rtc_2:1;			/* RW */
+		unsigned long	rtc_3:1;			/* RW */
+		unsigned long	rtc_4:1;			/* RW */
+		unsigned long	rtc_5:1;			/* RW */
+		unsigned long	rtc_6:1;			/* RW */
+		unsigned long	rtc_7:1;			/* RW */
+		unsigned long	rtc_8:1;			/* RW */
+		unsigned long	rtc_9:1;			/* RW */
+		unsigned long	rtc_10:1;			/* RW */
+		unsigned long	rtc_11:1;			/* RW */
+		unsigned long	rtc_12:1;			/* RW */
+		unsigned long	rtc_13:1;			/* RW */
+		unsigned long	rtc_14:1;			/* RW */
+		unsigned long	rtc_15:1;			/* RW */
+		unsigned long	rtc_16:1;			/* RW */
+		unsigned long	rtc_17:1;			/* RW */
+		unsigned long	rtc_18:1;			/* RW */
+		unsigned long	rtc_19:1;			/* RW */
+		unsigned long	rtc_20:1;			/* RW */
+		unsigned long	rtc_21:1;			/* RW */
+		unsigned long	rtc_22:1;			/* RW */
+		unsigned long	rtc_23:1;			/* RW */
+		unsigned long	rtc_24:1;			/* RW */
+		unsigned long	rtc_25:1;			/* RW */
+		unsigned long	rtc_26:1;			/* RW */
+		unsigned long	rtc_27:1;			/* RW */
+		unsigned long	rtc_28:1;			/* RW */
+		unsigned long	rtc_29:1;			/* RW */
+		unsigned long	rtc_30:1;			/* RW */
+		unsigned long	rtc_31:1;			/* RW */
+		unsigned long	rsvd_32_63:32;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                        UVH_EVENT_OCCURRED2_ALIAS                          */
+/* ========================================================================= */
+#define UVH_EVENT_OCCURRED2_ALIAS 0x70108UL
+
+
+/* ========================================================================= */
+/*                         UVH_EXTIO_INT0_BROADCAST                          */
+/* ========================================================================= */
+#define UVH_EXTIO_INT0_BROADCAST 0x61448UL
+
+/* UVH common defines*/
+#define UVH_EXTIO_INT0_BROADCAST_ENABLE_SHFT		0
+#define UVH_EXTIO_INT0_BROADCAST_ENABLE_MASK		0x0000000000000001UL
+
+
+union uvh_extio_int0_broadcast_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s;
+
+	/* UV5 unique struct */
+	struct uv5h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_extio_int0_broadcast_s {
+		unsigned long	enable:1;			/* RW */
+		unsigned long	rsvd_1_63:63;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                          UVH_GR0_GAM_GR_CONFIG                            */
+/* ========================================================================= */
+#define UVH_GR0_GAM_GR_CONFIG (						\
+	is_uv(UV5) ? 0x600028UL :					\
+	is_uv(UV4) ? 0x600028UL :					\
+	is_uv(UV3) ? 0xc00028UL :					\
+	is_uv(UV2) ? 0xc00028UL :					\
+	0)
+
+
+
+/* UVYH common defines */
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
+#define UVYH_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+
+/* UV4 unique defines */
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
+#define UV4H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+
+/* UV3 unique defines */
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT		0
+#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
+#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+
+/* UV2 unique defines */
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_SHFT		0
+#define UV2H_GR0_GAM_GR_CONFIG_N_GR_MASK		0x000000000000000fUL
+
+
+union uvyh_gr0_gam_gr_config_u {
+	unsigned long	v;
 
+	/* UVYH common struct */
+	struct uvyh_gr0_gam_gr_config_s {
+		unsigned long	rsvd_0_9:10;
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	rsvd_11_63:53;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_gr0_gam_gr_config_s {
+		unsigned long	rsvd_0_9:10;
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	rsvd_11_63:53;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_gr0_gam_gr_config_s {
+		unsigned long	rsvd_0_9:10;
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	rsvd_11_63:53;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_gr0_gam_gr_config_s {
+		unsigned long	m_skt:6;			/* RW */
+		unsigned long	undef_6_9:4;			/* Undefined */
+		unsigned long	subspace:1;			/* RW */
+		unsigned long	reserved:53;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_gr0_gam_gr_config_s {
+		unsigned long	n_gr:4;				/* RW */
+		unsigned long	reserved:60;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR0_TLB_INT0_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT0_CONFIG (					\
+	is_uv(UV4) ? 0x61b00UL :					\
+	is_uv(UV3) ? 0x61b00UL :					\
+	is_uv(UV2) ? 0x61b00UL :					\
+	uv_undefined("UVH_GR0_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT		8
+#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT		12
+#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT			13
+#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT			15
+#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT			16
+#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+
+
+union uvh_gr0_tlb_int0_config_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_gr0_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s2;
+};
+
+/* ========================================================================= */
+/*                         UVH_GR0_TLB_INT1_CONFIG                           */
+/* ========================================================================= */
+#define UVH_GR0_TLB_INT1_CONFIG (					\
+	is_uv(UV4) ? 0x61b40UL :					\
+	is_uv(UV3) ? 0x61b40UL :					\
+	is_uv(UV2) ? 0x61b40UL :					\
+	uv_undefined("UVH_GR0_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT		8
+#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT		12
+#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT			13
+#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT			15
+#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT			16
+#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
+
+
+union uvh_gr0_tlb_int1_config_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
 
-union uvh_gr1_tlb_int1_config_u {
-	unsigned long	v;
-	struct uvh_gr1_tlb_int1_config_s {
+	/* UV3 unique struct */
+	struct uv3h_gr0_tlb_int1_config_s {
 		unsigned long	vector_:8;			/* RW */
 		unsigned long	dm:3;				/* RW */
 		unsigned long	destmode:1;			/* RW */
@@ -1113,1326 +2432,403 @@ union uvh_gr1_tlb_int1_config_u {
 		unsigned long	m:1;				/* RW */
 		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
-	} s;
-};
-
-/* ========================================================================= */
-/*                         UVH_GR1_TLB_MMR_CONTROL                           */
-/* ========================================================================= */
-#define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL
-#define UV4H_GR1_TLB_MMR_CONTROL 0x701080UL
-#define UVH_GR1_TLB_MMR_CONTROL (					\
-	is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_MMR_CONTROL :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_CONTROL)
-
-#define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-
-#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_SHFT	48
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_SHFT	52
-#define UV2H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK	0x0001000000000000UL
-#define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK	0x0010000000000000UL
-
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		12
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000000fffUL
-#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000003000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_SHFT		0
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT		13
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT	16
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT	20
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT		21
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT		30
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT		31
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT	32
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_SHFT		59
-#define UV4H_GR1_TLB_MMR_CONTROL_INDEX_MASK		0x0000000000001fffUL
-#define UV4H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK		0x0000000000006000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK	0x0000000000010000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK	0x0000000000100000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK		0x0000000000200000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK		0x0000000040000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK		0x0000000080000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK	0x0000000100000000UL
-#define UV4H_GR1_TLB_MMR_CONTROL_PAGE_SIZE_MASK		0xf800000000000000UL
-
-
-union uvh_gr1_tlb_mmr_control_u {
-	unsigned long	v;
-	struct uvh_gr1_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	rsvd_32_48:17;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
-	} s;
-	struct uvxh_gr1_tlb_mmr_control_s {
-		unsigned long	rsvd_0_15:16;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	rsvd_48:1;
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_63:12;
-	} sx;
-	struct uv2h_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	rsvd_21_29:9;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	mmr_inj_con:1;			/* RW */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	mmr_inj_tlbram:1;		/* RW */
-		unsigned long	rsvd_53_63:11;
-	} s2;
-	struct uv3h_gr1_tlb_mmr_control_s {
-		unsigned long	index:12;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_14_15:2;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	undef_52:1;			/* Undefined */
-		unsigned long	rsvd_53_63:11;
-	} s3;
-	struct uv4h_gr1_tlb_mmr_control_s {
-		unsigned long	index:13;			/* RW */
-		unsigned long	mem_sel:2;			/* RW */
-		unsigned long	rsvd_15:1;
-		unsigned long	auto_valid_en:1;		/* RW */
-		unsigned long	rsvd_17_19:3;
-		unsigned long	mmr_hash_index_en:1;		/* RW */
-		unsigned long	ecc_sel:1;			/* RW */
-		unsigned long	rsvd_22_29:8;
-		unsigned long	mmr_write:1;			/* WP */
-		unsigned long	mmr_read:1;			/* WP */
-		unsigned long	mmr_op_done:1;			/* RW */
-		unsigned long	rsvd_33_47:15;
-		unsigned long	undef_48:1;			/* Undefined */
-		unsigned long	rsvd_49_51:3;
-		unsigned long	rsvd_52_58:7;
-		unsigned long	page_size:5;			/* RW */
-	} s4;
-};
-
-/* ========================================================================= */
-/*                       UVH_GR1_TLB_MMR_READ_DATA_HI                        */
-/* ========================================================================= */
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI 0x7010a0UL
-#define UVH_GR1_TLB_MMR_READ_DATA_HI (					\
-	is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_HI :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_HI)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		41
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	43
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	44
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	45
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x000001ffffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0000060000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0000080000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0000100000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0000200000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT		0
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_SHFT		34
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT		49
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT	51
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT	52
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT	53
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT	55
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK		0x00000003ffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_PNID_MASK		0x0001fffc00000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK		0x0006000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK	0x0008000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK	0x0010000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK	0x0020000000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK	0xff80000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_hi_u {
-	unsigned long	v;
-	struct uv2h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	rsvd_45_63:19;
-	} s2;
-	struct uv3h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:41;				/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_46_54:9;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
 	} s3;
-	struct uv4h_gr1_tlb_mmr_read_data_hi_s {
-		unsigned long	pfn:34;				/* RO */
-		unsigned long	pnid:15;			/* RO */
-		unsigned long	gaa:2;				/* RO */
-		unsigned long	dirty:1;			/* RO */
-		unsigned long	larger:1;			/* RO */
-		unsigned long	aa_ext:1;			/* RO */
-		unsigned long	undef_54:1;			/* Undefined */
-		unsigned long	way_ecc:9;			/* RO */
-	} s4;
-};
 
-/* ========================================================================= */
-/*                       UVH_GR1_TLB_MMR_READ_DATA_LO                        */
-/* ========================================================================= */
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO 0x7010a8UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO (					\
-	is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO :			\
-	is_uv3_hub() ? UV3H_GR1_TLB_MMR_READ_DATA_LO :			\
-	/*is_uv4_hub*/ UV4H_GR1_TLB_MMR_READ_DATA_LO)
-
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT		63
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK		0x8000000000000000UL
-
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT		0
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT		39
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT	63
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK		0x0000007fffffffffUL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK		0x7fffff8000000000UL
-#define UV4H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK	0x8000000000000000UL
-
-
-union uvh_gr1_tlb_mmr_read_data_lo_u {
-	unsigned long	v;
-	struct uvh_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s;
-	struct uvxh_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} sx;
-	struct uv2h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
+	/* UV2 unique struct */
+	struct uv2h_gr0_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s2;
-	struct uv3h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s3;
-	struct uv4h_gr1_tlb_mmr_read_data_lo_s {
-		unsigned long	vpn:39;				/* RO */
-		unsigned long	asid:24;			/* RO */
-		unsigned long	valid:1;			/* RO */
-	} s4;
 };
 
 /* ========================================================================= */
-/*                               UVH_INT_CMPB                                */
+/*                         UVH_GR1_TLB_INT0_CONFIG                           */
 /* ========================================================================= */
-#define UVH_INT_CMPB 0x22080UL
-
-#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT		0
-#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK		0x00ffffffffffffffUL
+#define UVH_GR1_TLB_INT0_CONFIG (					\
+	is_uv(UV4) ? 0x62100UL :					\
+	is_uv(UV3) ? 0x61f00UL :					\
+	is_uv(UV2) ? 0x61f00UL :					\
+	uv_undefined("UVH_GR1_TLB_INT0_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT		8
+#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT		12
+#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT			13
+#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT			15
+#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT			16
+#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
-union uvh_int_cmpb_u {
+union uvh_gr1_tlb_int0_config_u {
 	unsigned long	v;
-	struct uvh_int_cmpb_s {
-		unsigned long	real_time_cmpb:56;		/* RW */
-		unsigned long	rsvd_56_63:8;
-	} s;
-};
-
-/* ========================================================================= */
-/*                               UVH_INT_CMPC                                */
-/* ========================================================================= */
-#define UVH_INT_CMPC 0x22100UL
-
-
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT		0
-#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK		0x00ffffffffffffffUL
-
 
-union uvh_int_cmpc_u {
-	unsigned long	v;
-	struct uvh_int_cmpc_s {
-		unsigned long	real_time_cmpc:56;		/* RW */
-		unsigned long	rsvd_56_63:8;
+	/* UVH common struct */
+	struct uvh_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s;
-};
-
-/* ========================================================================= */
-/*                               UVH_INT_CMPD                                */
-/* ========================================================================= */
-#define UVH_INT_CMPD 0x22180UL
 
+	/* UVXH common struct */
+	struct uvxh_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} sx;
 
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT		0
-#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK		0x00ffffffffffffffUL
+	/* UV4 unique struct */
+	struct uv4h_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
 
+	/* UV3 unique struct */
+	struct uv3h_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
 
-union uvh_int_cmpd_u {
-	unsigned long	v;
-	struct uvh_int_cmpd_s {
-		unsigned long	real_time_cmpd:56;		/* RW */
-		unsigned long	rsvd_56_63:8;
-	} s;
+	/* UV2 unique struct */
+	struct uv2h_gr1_tlb_int0_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                               UVH_IPI_INT                                 */
+/*                         UVH_GR1_TLB_INT1_CONFIG                           */
 /* ========================================================================= */
-#define UVH_IPI_INT 0x60500UL
-
-#define UV2H_IPI_INT_32 0x348
-#define UV3H_IPI_INT_32 0x348
-#define UV4H_IPI_INT_32 0x268
-#define UVH_IPI_INT_32 (						\
-	is_uv2_hub() ? UV2H_IPI_INT_32 :				\
-	is_uv3_hub() ? UV3H_IPI_INT_32 :				\
-	/*is_uv4_hub*/ UV4H_IPI_INT_32)
-
-#define UVH_IPI_INT_VECTOR_SHFT				0
-#define UVH_IPI_INT_DELIVERY_MODE_SHFT			8
-#define UVH_IPI_INT_DESTMODE_SHFT			11
-#define UVH_IPI_INT_APIC_ID_SHFT			16
-#define UVH_IPI_INT_SEND_SHFT				63
-#define UVH_IPI_INT_VECTOR_MASK				0x00000000000000ffUL
-#define UVH_IPI_INT_DELIVERY_MODE_MASK			0x0000000000000700UL
-#define UVH_IPI_INT_DESTMODE_MASK			0x0000000000000800UL
-#define UVH_IPI_INT_APIC_ID_MASK			0x0000ffffffff0000UL
-#define UVH_IPI_INT_SEND_MASK				0x8000000000000000UL
+#define UVH_GR1_TLB_INT1_CONFIG (					\
+	is_uv(UV4) ? 0x62140UL :					\
+	is_uv(UV3) ? 0x61f40UL :					\
+	is_uv(UV2) ? 0x61f40UL :					\
+	uv_undefined("UVH_GR1_TLB_INT1_CONFIG"))
+
+
+/* UVXH common defines */
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT		0
+#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK		0x00000000000000ffUL
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT		8
+#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK		0x0000000000000700UL
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT		11
+#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT		12
+#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK		0x0000000000001000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT			13
+#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK			0x0000000000002000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT			15
+#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK			0x0000000000008000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT			16
+#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK			0x0000000000010000UL
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT		32
+#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
 
-union uvh_ipi_int_u {
+union uvh_gr1_tlb_int1_config_u {
 	unsigned long	v;
-	struct uvh_ipi_int_s {
+
+	/* UVH common struct */
+	struct uvh_gr1_tlb_int1_config_s {
 		unsigned long	vector_:8;			/* RW */
-		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	dm:3;				/* RW */
 		unsigned long	destmode:1;			/* RW */
-		unsigned long	rsvd_12_15:4;
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
 		unsigned long	apic_id:32;			/* RW */
-		unsigned long	rsvd_48_62:15;
-		unsigned long	send:1;				/* WP */
 	} s;
-};
-
-/* ========================================================================= */
-/*                   UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST                     */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST (				\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0
 
+	/* UVXH common struct */
+	struct uvxh_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} sx;
 
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
-
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL
+	/* UV4 unique struct */
+	struct uv4h_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s4;
 
+	/* UV3 unique struct */
+	struct uv3h_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
 
-union uvh_lb_bau_intd_payload_queue_first_u {
-	unsigned long	v;
-	struct uv2h_lb_bau_intd_payload_queue_first_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_48:6;
-		unsigned long	node_id:14;			/* RW */
-		unsigned long	rsvd_63:1;
+	/* UV2 unique struct */
+	struct uv2h_gr1_tlb_int1_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s2;
-	struct uv3h_lb_bau_intd_payload_queue_first_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_48:6;
-		unsigned long	node_id:14;			/* RW */
-		unsigned long	rsvd_63:1;
-	} s3;
 };
 
 /* ========================================================================= */
-/*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST                     */
+/*                               UVH_INT_CMPB                                */
 /* ========================================================================= */
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST (				\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8
-
-
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+#define UVH_INT_CMPB 0x22080UL
 
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL
+/* UVH common defines*/
+#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT		0
+#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK		0x00ffffffffffffffUL
 
 
-union uvh_lb_bau_intd_payload_queue_last_u {
+union uvh_int_cmpb_u {
 	unsigned long	v;
-	struct uv2h_lb_bau_intd_payload_queue_last_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s2;
-	struct uv3h_lb_bau_intd_payload_queue_last_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s3;
-};
-
-/* ========================================================================= */
-/*                    UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL                     */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL
-#define UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL uv_undefined("UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL")
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL (				\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL)
-#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0
-
 
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
-
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4
-#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL
+	/* UVH common struct */
+	struct uvh_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s;
 
+	/* UV5 unique struct */
+	struct uv5h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s5;
 
-union uvh_lb_bau_intd_payload_queue_tail_u {
-	unsigned long	v;
-	struct uv2h_lb_bau_intd_payload_queue_tail_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s2;
-	struct uv3h_lb_bau_intd_payload_queue_tail_s {
-		unsigned long	rsvd_0_3:4;
-		unsigned long	address:39;			/* RW */
-		unsigned long	rsvd_43_63:21;
-	} s3;
-};
+	/* UV4 unique struct */
+	struct uv4h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s4;
 
-/* ========================================================================= */
-/*                   UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE                    */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE (				\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68
-
-
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL
-
-
-union uvh_lb_bau_intd_software_acknowledge_u {
-	unsigned long	v;
-	struct uv2h_lb_bau_intd_software_acknowledge_s {
-		unsigned long	pending_0:1;			/* RW */
-		unsigned long	pending_1:1;			/* RW */
-		unsigned long	pending_2:1;			/* RW */
-		unsigned long	pending_3:1;			/* RW */
-		unsigned long	pending_4:1;			/* RW */
-		unsigned long	pending_5:1;			/* RW */
-		unsigned long	pending_6:1;			/* RW */
-		unsigned long	pending_7:1;			/* RW */
-		unsigned long	timeout_0:1;			/* RW */
-		unsigned long	timeout_1:1;			/* RW */
-		unsigned long	timeout_2:1;			/* RW */
-		unsigned long	timeout_3:1;			/* RW */
-		unsigned long	timeout_4:1;			/* RW */
-		unsigned long	timeout_5:1;			/* RW */
-		unsigned long	timeout_6:1;			/* RW */
-		unsigned long	timeout_7:1;			/* RW */
-		unsigned long	rsvd_16_63:48;
-	} s2;
-	struct uv3h_lb_bau_intd_software_acknowledge_s {
-		unsigned long	pending_0:1;			/* RW */
-		unsigned long	pending_1:1;			/* RW */
-		unsigned long	pending_2:1;			/* RW */
-		unsigned long	pending_3:1;			/* RW */
-		unsigned long	pending_4:1;			/* RW */
-		unsigned long	pending_5:1;			/* RW */
-		unsigned long	pending_6:1;			/* RW */
-		unsigned long	pending_7:1;			/* RW */
-		unsigned long	timeout_0:1;			/* RW */
-		unsigned long	timeout_1:1;			/* RW */
-		unsigned long	timeout_2:1;			/* RW */
-		unsigned long	timeout_3:1;			/* RW */
-		unsigned long	timeout_4:1;			/* RW */
-		unsigned long	timeout_5:1;			/* RW */
-		unsigned long	timeout_6:1;			/* RW */
-		unsigned long	timeout_7:1;			/* RW */
-		unsigned long	rsvd_16_63:48;
+	/* UV3 unique struct */
+	struct uv3h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
 	} s3;
-};
-
-/* ========================================================================= */
-/*                UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS                 */
-/* ========================================================================= */
-#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL
-#define UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS uv_undefined("UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS")
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS (			\
-	is_uv2_hub() ? UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
-	is_uv3_hub() ? UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS)
-#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70
 
-
-/* ========================================================================= */
-/*                         UVH_LB_BAU_MISC_CONTROL                           */
-/* ========================================================================= */
-#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL
-#define UV4H_LB_BAU_MISC_CONTROL 0xc8170UL
-#define UVH_LB_BAU_MISC_CONTROL (					\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL :			\
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL :			\
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL)
-
-#define UV2H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV3H_LB_BAU_MISC_CONTROL_32 0xa10
-#define UV4H_LB_BAU_MISC_CONTROL_32 0xa18
-#define UVH_LB_BAU_MISC_CONTROL_32 (					\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_32 :			\
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_32 :			\
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_32)
-
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL
-#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT	0
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT		8
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT	9
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT	10
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_SHFT	15
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT	30
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_SHFT	37
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_SHFT 46
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_SHFT		48
-#define UV4H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK	0x00000000000000ffUL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK		0x0000000000000100UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK	0x0000000000000200UL
-#define UV4H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK	0x0000000000000400UL
-#define UV4H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL
-#define UV4H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_15_19_MASK	0x00000000000f8000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL
-#define UV4H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL
-#define UV4H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK	0x0000000040000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_RESERVED_37_MASK	0x0000002000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_ADDRESS_INTERLEAVE_SELECT_MASK 0x0000400000000000UL
-#define UV4H_LB_BAU_MISC_CONTROL_FUN_MASK		0xffff000000000000UL
-
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK (	\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT (	\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK (	\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK)
-#define UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT	\
-	uv_undefined("UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT")
-#define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT (	\
-	is_uv2_hub() ? UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
-	is_uv3_hub() ? UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT : \
-	/*is_uv4_hub*/ UV4H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT)
-
-union uvh_lb_bau_misc_control_u {
-	unsigned long	v;
-	struct uvh_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	rsvd_15_19:5;
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	rsvd_29_47:19;
-		unsigned long	fun:16;				/* RW */
-	} s;
-	struct uvxh_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	rsvd_15_19:5;
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	rsvd_36_47:12;
-		unsigned long	fun:16;				/* RW */
-	} sx;
-	struct uv2h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	rsvd_36_47:12;
-		unsigned long	fun:16;				/* RW */
+	/* UV2 unique struct */
+	struct uv2h_int_cmpb_s {
+		unsigned long	real_time_cmpb:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
 	} s2;
-	struct uv3h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	enable_intd_soft_ack_mode:1;	/* RW */
-		unsigned long	intd_soft_ack_timeout_period:4;	/* RW */
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	suppress_quiesce_msgs_to_qpi:1;	/* RW */
-		unsigned long	enable_intd_prefetch_hint:1;	/* RW */
-		unsigned long	thread_kill_timebase:8;		/* RW */
-		unsigned long	rsvd_46_47:2;
-		unsigned long	fun:16;				/* RW */
-	} s3;
-	struct uv4h_lb_bau_misc_control_s {
-		unsigned long	rejection_delay:8;		/* RW */
-		unsigned long	apic_mode:1;			/* RW */
-		unsigned long	force_broadcast:1;		/* RW */
-		unsigned long	force_lock_nop:1;		/* RW */
-		unsigned long	qpi_agent_presence_vector:3;	/* RW */
-		unsigned long	descriptor_fetch_mode:1;	/* RW */
-		unsigned long	rsvd_15_19:5;
-		unsigned long	enable_dual_mapping_mode:1;	/* RW */
-		unsigned long	vga_io_port_decode_enable:1;	/* RW */
-		unsigned long	vga_io_port_16_bit_decode:1;	/* RW */
-		unsigned long	suppress_dest_registration:1;	/* RW */
-		unsigned long	programmed_initial_priority:3;	/* RW */
-		unsigned long	use_incoming_priority:1;	/* RW */
-		unsigned long	enable_programmed_initial_priority:1;/* RW */
-		unsigned long	enable_automatic_apic_mode_selection:1;/* RW */
-		unsigned long	apic_mode_status:1;		/* RO */
-		unsigned long	suppress_interrupts_to_self:1;	/* RW */
-		unsigned long	enable_lock_based_system_flush:1;/* RW */
-		unsigned long	enable_extended_sb_status:1;	/* RW */
-		unsigned long	suppress_int_prio_udt_to_self:1;/* RW */
-		unsigned long	use_legacy_descriptor_formats:1;/* RW */
-		unsigned long	suppress_quiesce_msgs_to_qpi:1;	/* RW */
-		unsigned long	rsvd_37:1;
-		unsigned long	thread_kill_timebase:8;		/* RW */
-		unsigned long	address_interleave_select:1;	/* RW */
-		unsigned long	rsvd_47:1;
-		unsigned long	fun:16;				/* RW */
-	} s4;
 };
 
 /* ========================================================================= */
-/*                     UVH_LB_BAU_SB_ACTIVATION_CONTROL                      */
+/*                               UVH_IPI_INT                                 */
 /* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL 0xc8020UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8
-#define UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9c8
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_CONTROL_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT	0
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT	62
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT	63
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK	0x000000000000003fUL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK	0x4000000000000000UL
-#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK	0x8000000000000000UL
-
-
-union uvh_lb_bau_sb_activation_control_u {
-	unsigned long	v;
-	struct uvh_lb_bau_sb_activation_control_s {
-		unsigned long	index:6;			/* RW */
-		unsigned long	rsvd_6_61:56;
-		unsigned long	push:1;				/* WP */
-		unsigned long	init:1;				/* WP */
-	} s;
-};
+#define UVH_IPI_INT 0x60500UL
 
-/* ========================================================================= */
-/*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_0                      */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0 0xc8030UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9d0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_0_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT	0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK	0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_0_u {
-	unsigned long	v;
-	struct uvh_lb_bau_sb_activation_status_0_s {
-		unsigned long	status:64;			/* RW */
-	} s;
-};
+/* UVH common defines*/
+#define UVH_IPI_INT_VECTOR_SHFT				0
+#define UVH_IPI_INT_VECTOR_MASK				0x00000000000000ffUL
+#define UVH_IPI_INT_DELIVERY_MODE_SHFT			8
+#define UVH_IPI_INT_DELIVERY_MODE_MASK			0x0000000000000700UL
+#define UVH_IPI_INT_DESTMODE_SHFT			11
+#define UVH_IPI_INT_DESTMODE_MASK			0x0000000000000800UL
+#define UVH_IPI_INT_APIC_ID_SHFT			16
+#define UVH_IPI_INT_APIC_ID_MASK			0x0000ffffffff0000UL
+#define UVH_IPI_INT_SEND_SHFT				63
+#define UVH_IPI_INT_SEND_MASK				0x8000000000000000UL
 
-/* ========================================================================= */
-/*                    UVH_LB_BAU_SB_ACTIVATION_STATUS_1                      */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1 0xc8040UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1)
-
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9d8
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_1_32)
-
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT	0
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK	0xffffffffffffffffUL
-
-
-union uvh_lb_bau_sb_activation_status_1_u {
+
+union uvh_ipi_int_u {
 	unsigned long	v;
-	struct uvh_lb_bau_sb_activation_status_1_s {
-		unsigned long	status:64;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
 	} s;
-};
 
-/* ========================================================================= */
-/*                      UVH_LB_BAU_SB_DESCRIPTOR_BASE                        */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE 0xc8010UL
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE (					\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE :			\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE :			\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE)
-
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9c0
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_32)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT	12
-
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL
-#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	49
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x00003ffffffff000UL
-#define UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0x7ffe000000000000UL
-
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT	53
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000ffffffffff000UL
-#define UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK	0xffe0000000000000UL
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT (			\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK (			\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_PAGE_ADDRESS_MASK)
-
-#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK (			\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	is_uv4a_hub() ? UV4AH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK :	\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK)
+	/* UV5 unique struct */
+	struct uv5h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_ipi_int_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	delivery_mode:3;		/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	rsvd_12_15:4;
+		unsigned long	apic_id:32;			/* RW */
+		unsigned long	rsvd_48_62:15;
+		unsigned long	send:1;				/* WP */
+	} s2;
+};
 
 /* ========================================================================= */
 /*                               UVH_NODE_ID                                 */
 /* ========================================================================= */
 #define UVH_NODE_ID 0x0UL
-#define UV2H_NODE_ID 0x0UL
-#define UV3H_NODE_ID 0x0UL
-#define UV4H_NODE_ID 0x0UL
 
+/* UVH common defines*/
 #define UVH_NODE_ID_FORCE1_SHFT				0
-#define UVH_NODE_ID_MANUFACTURER_SHFT			1
-#define UVH_NODE_ID_PART_NUMBER_SHFT			12
-#define UVH_NODE_ID_REVISION_SHFT			28
-#define UVH_NODE_ID_NODE_ID_SHFT			32
 #define UVH_NODE_ID_FORCE1_MASK				0x0000000000000001UL
+#define UVH_NODE_ID_MANUFACTURER_SHFT			1
 #define UVH_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
+#define UVH_NODE_ID_PART_NUMBER_SHFT			12
 #define UVH_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
+#define UVH_NODE_ID_REVISION_SHFT			28
 #define UVH_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UVH_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
+#define UVH_NODE_ID_NODE_ID_SHFT			32
+#define UVH_NODE_ID_NI_PORT_SHFT			57
 
-#define UVXH_NODE_ID_FORCE1_SHFT			0
-#define UVXH_NODE_ID_MANUFACTURER_SHFT			1
-#define UVXH_NODE_ID_PART_NUMBER_SHFT			12
-#define UVXH_NODE_ID_REVISION_SHFT			28
-#define UVXH_NODE_ID_NODE_ID_SHFT			32
-#define UVXH_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UVXH_NODE_ID_NI_PORT_SHFT			57
-#define UVXH_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UVXH_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UVXH_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UVXH_NODE_ID_REVISION_MASK			0x00000000f0000000UL
+/* UVXH common defines */
 #define UVXH_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
+#define UVXH_NODE_ID_NODES_PER_BIT_SHFT			50
 #define UVXH_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
 #define UVXH_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
 
-#define UV2H_NODE_ID_FORCE1_SHFT			0
-#define UV2H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV2H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV2H_NODE_ID_REVISION_SHFT			28
-#define UV2H_NODE_ID_NODE_ID_SHFT			32
-#define UV2H_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UV2H_NODE_ID_NI_PORT_SHFT			57
-#define UV2H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV2H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV2H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV2H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV2H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UV2H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UV2H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
-
-#define UV3H_NODE_ID_FORCE1_SHFT			0
-#define UV3H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV3H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV3H_NODE_ID_REVISION_SHFT			28
-#define UV3H_NODE_ID_NODE_ID_SHFT			32
-#define UV3H_NODE_ID_ROUTER_SELECT_SHFT			48
-#define UV3H_NODE_ID_RESERVED_2_SHFT			49
-#define UV3H_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UV3H_NODE_ID_NI_PORT_SHFT			57
-#define UV3H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV3H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV3H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV3H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV3H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
-#define UV3H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
-#define UV3H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
-#define UV3H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UV3H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
-
-#define UV4H_NODE_ID_FORCE1_SHFT			0
-#define UV4H_NODE_ID_MANUFACTURER_SHFT			1
-#define UV4H_NODE_ID_PART_NUMBER_SHFT			12
-#define UV4H_NODE_ID_REVISION_SHFT			28
-#define UV4H_NODE_ID_NODE_ID_SHFT			32
+/* UVYH common defines */
+#define UVYH_NODE_ID_NODE_ID_MASK			0x0000007f00000000UL
+#define UVYH_NODE_ID_NI_PORT_MASK			0x7e00000000000000UL
+
+/* UV4 unique defines */
 #define UV4H_NODE_ID_ROUTER_SELECT_SHFT			48
-#define UV4H_NODE_ID_RESERVED_2_SHFT			49
-#define UV4H_NODE_ID_NODES_PER_BIT_SHFT			50
-#define UV4H_NODE_ID_NI_PORT_SHFT			57
-#define UV4H_NODE_ID_FORCE1_MASK			0x0000000000000001UL
-#define UV4H_NODE_ID_MANUFACTURER_MASK			0x0000000000000ffeUL
-#define UV4H_NODE_ID_PART_NUMBER_MASK			0x000000000ffff000UL
-#define UV4H_NODE_ID_REVISION_MASK			0x00000000f0000000UL
-#define UV4H_NODE_ID_NODE_ID_MASK			0x00007fff00000000UL
 #define UV4H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
+#define UV4H_NODE_ID_RESERVED_2_SHFT			49
 #define UV4H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
-#define UV4H_NODE_ID_NODES_PER_BIT_MASK			0x01fc000000000000UL
-#define UV4H_NODE_ID_NI_PORT_MASK			0x3e00000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_NODE_ID_ROUTER_SELECT_SHFT			48
+#define UV3H_NODE_ID_ROUTER_SELECT_MASK			0x0001000000000000UL
+#define UV3H_NODE_ID_RESERVED_2_SHFT			49
+#define UV3H_NODE_ID_RESERVED_2_MASK			0x0002000000000000UL
 
 
 union uvh_node_id_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_node_id_s {
 		unsigned long	force1:1;			/* RO */
 		unsigned long	manufacturer:11;		/* RO */
 		unsigned long	part_number:16;			/* RO */
 		unsigned long	revision:4;			/* RO */
-		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47_63:17;
+		unsigned long	rsvd_32_63:32;
 	} s;
+
+	/* UVXH common struct */
 	struct uvxh_node_id_s {
 		unsigned long	force1:1;			/* RO */
 		unsigned long	manufacturer:11;		/* RO */
@@ -2444,17 +2840,47 @@ union uvh_node_id_u {
 		unsigned long	ni_port:5;			/* RO */
 		unsigned long	rsvd_62_63:2;
 	} sx;
-	struct uv2h_node_id_s {
+
+	/* UVYH common struct */
+	struct uvyh_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:7;			/* RW */
+		unsigned long	rsvd_39_56:18;
+		unsigned long	ni_port:6;			/* RO */
+		unsigned long	rsvd_63:1;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_node_id_s {
+		unsigned long	force1:1;			/* RO */
+		unsigned long	manufacturer:11;		/* RO */
+		unsigned long	part_number:16;			/* RO */
+		unsigned long	revision:4;			/* RO */
+		unsigned long	node_id:7;			/* RW */
+		unsigned long	rsvd_39_56:18;
+		unsigned long	ni_port:6;			/* RO */
+		unsigned long	rsvd_63:1;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_node_id_s {
 		unsigned long	force1:1;			/* RO */
 		unsigned long	manufacturer:11;		/* RO */
 		unsigned long	part_number:16;			/* RO */
 		unsigned long	revision:4;			/* RO */
 		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47_49:3;
+		unsigned long	rsvd_47:1;
+		unsigned long	router_select:1;		/* RO */
+		unsigned long	rsvd_49:1;
 		unsigned long	nodes_per_bit:7;		/* RO */
 		unsigned long	ni_port:5;			/* RO */
 		unsigned long	rsvd_62_63:2;
-	} s2;
+	} s4;
+
+	/* UV3 unique struct */
 	struct uv3h_node_id_s {
 		unsigned long	force1:1;			/* RO */
 		unsigned long	manufacturer:11;		/* RO */
@@ -2468,1642 +2894,1743 @@ union uvh_node_id_u {
 		unsigned long	ni_port:5;			/* RO */
 		unsigned long	rsvd_62_63:2;
 	} s3;
-	struct uv4h_node_id_s {
+
+	/* UV2 unique struct */
+	struct uv2h_node_id_s {
 		unsigned long	force1:1;			/* RO */
 		unsigned long	manufacturer:11;		/* RO */
 		unsigned long	part_number:16;			/* RO */
 		unsigned long	revision:4;			/* RO */
 		unsigned long	node_id:15;			/* RW */
-		unsigned long	rsvd_47:1;
-		unsigned long	router_select:1;		/* RO */
-		unsigned long	rsvd_49:1;
+		unsigned long	rsvd_47_49:3;
 		unsigned long	nodes_per_bit:7;		/* RO */
 		unsigned long	ni_port:5;			/* RO */
 		unsigned long	rsvd_62_63:2;
-	} s4;
+	} s2;
+};
+
+/* ========================================================================= */
+/*                            UVH_NODE_PRESENT_0                             */
+/* ========================================================================= */
+#define UVH_NODE_PRESENT_0 (						\
+	is_uv(UV5) ? 0x1400UL :						\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_0_NODES_SHFT			0
+#define UVYH_NODE_PRESENT_0_NODES_MASK			0xffffffffffffffffUL
+
+
+union uvh_node_present_0_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_node_present_0_s {
+		unsigned long	nodes:64;			/* RW */
+	} s;
+
+	/* UVYH common struct */
+	struct uvyh_node_present_0_s {
+		unsigned long	nodes:64;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_node_present_0_s {
+		unsigned long	nodes:64;			/* RW */
+	} s5;
+};
+
+/* ========================================================================= */
+/*                            UVH_NODE_PRESENT_1                             */
+/* ========================================================================= */
+#define UVH_NODE_PRESENT_1 (						\
+	is_uv(UV5) ? 0x1408UL :						\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_NODE_PRESENT_1_NODES_SHFT			0
+#define UVYH_NODE_PRESENT_1_NODES_MASK			0xffffffffffffffffUL
+
+
+union uvh_node_present_1_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_node_present_1_s {
+		unsigned long	nodes:64;			/* RW */
+	} s;
+
+	/* UVYH common struct */
+	struct uvyh_node_present_1_s {
+		unsigned long	nodes:64;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_node_present_1_s {
+		unsigned long	nodes:64;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
 /*                          UVH_NODE_PRESENT_TABLE                           */
 /* ========================================================================= */
-#define UVH_NODE_PRESENT_TABLE 0x1400UL
+#define UVH_NODE_PRESENT_TABLE (					\
+	is_uv(UV4) ? 0x1400UL :						\
+	is_uv(UV3) ? 0x1400UL :						\
+	is_uv(UV2) ? 0x1400UL :						\
+	0)
 
-#define UV2H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV3H_NODE_PRESENT_TABLE_DEPTH 16
-#define UV4H_NODE_PRESENT_TABLE_DEPTH 4
 #define UVH_NODE_PRESENT_TABLE_DEPTH (					\
-	is_uv2_hub() ? UV2H_NODE_PRESENT_TABLE_DEPTH :			\
-	is_uv3_hub() ? UV3H_NODE_PRESENT_TABLE_DEPTH :			\
-	/*is_uv4_hub*/ UV4H_NODE_PRESENT_TABLE_DEPTH)
+	is_uv(UV4) ? 4 :						\
+	is_uv(UV3) ? 16 :						\
+	is_uv(UV2) ? 16 :						\
+	0)
+
 
-#define UVH_NODE_PRESENT_TABLE_NODES_SHFT		0
-#define UVH_NODE_PRESENT_TABLE_NODES_MASK		0xffffffffffffffffUL
+/* UVXH common defines */
+#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT		0
+#define UVXH_NODE_PRESENT_TABLE_NODES_MASK		0xffffffffffffffffUL
 
 
 union uvh_node_present_table_u {
 	unsigned long	v;
+
+	/* UVH common struct */
 	struct uvh_node_present_table_s {
 		unsigned long	nodes:64;			/* RW */
 	} s;
+
+	/* UVXH common struct */
+	struct uvxh_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_node_present_table_s {
+		unsigned long	nodes:64;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR                  */
+/*                       UVH_RH10_GAM_ADDR_MAP_CONFIG                        */
 /* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x4800c8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR (			\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_0_mmr_u {
+#define UVH_RH10_GAM_ADDR_MAP_CONFIG (					\
+	is_uv(UV5) ? 0x470000UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT	6
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_N_SKT_MASK	0x00000000000001c0UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_SHFT	12
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_LS_ENABLE_MASK	0x0000000000001000UL
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_SHFT 16
+#define UVYH_RH10_GAM_ADDR_MAP_CONFIG_MK_TME_KEYID_BITS_MASK 0x00000000000f0000UL
+
+
+union uvh_rh10_gam_addr_map_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_overlay_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_addr_map_config_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	n_skt:3;			/* RW */
+		unsigned long	undef_9_11:3;			/* Undefined */
+		unsigned long	ls_enable:1;			/* RW */
+		unsigned long	undef_13_15:3;			/* Undefined */
+		unsigned long	mk_tme_keyid_bits:4;		/* RW */
+		unsigned long	rsvd_20_63:44;
 	} s;
-	struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} sx;
-	struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_alias210_overlay_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s4;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_addr_map_config_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	n_skt:3;			/* RW */
+		unsigned long	undef_9_11:3;			/* Undefined */
+		unsigned long	ls_enable:1;			/* RW */
+		unsigned long	undef_13_15:3;			/* Undefined */
+		unsigned long	mk_tme_keyid_bits:4;		/* RW */
+		unsigned long	rsvd_20_63:44;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_addr_map_config_s {
+		unsigned long	undef_0_5:6;			/* Undefined */
+		unsigned long	n_skt:3;			/* RW */
+		unsigned long	undef_9_11:3;			/* Undefined */
+		unsigned long	ls_enable:1;			/* RW */
+		unsigned long	undef_13_15:3;			/* Undefined */
+		unsigned long	mk_tme_keyid_bits:4;		/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR                  */
+/*                     UVH_RH10_GAM_GRU_OVERLAY_CONFIG                       */
 /* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x4800d8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR (			\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_1_mmr_u {
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG (				\
+	is_uv(UV5) ? 0x4700b0UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	25
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x000ffffffe000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT	52
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK	0x0070000000000000UL
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffe000000UL :				\
+	0)
+#define UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV5) ? 25 :						\
+	-1)
+
+union uvh_rh10_gam_gru_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_overlay_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s;
-	struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} sx;
-	struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_alias210_overlay_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_gru_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	n_gru:3;			/* RW */
+		unsigned long	undef_55_62:8;			/* Undefined */
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_gru_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	n_gru:3;			/* RW */
+		unsigned long	undef_55_62:8;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_gru_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	n_gru:3;			/* RW */
+		unsigned long	undef_55_62:8;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                 UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR                  */
+/*                    UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0                     */
 /* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x4800e8UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR (			\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR)
-
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL
-#define UV4H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_alias210_overlay_config_2_mmr_u {
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0 (				\
+	is_uv(UV5) ? 0x473000UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffc000000UL :				\
+	0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT (			\
+	is_uv(UV5) ? 26 :						\
+	-1)
+
+union uvh_rh10_gam_mmioh_overlay_config0_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_overlay_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} sx;
-	struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
-		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
 		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_alias210_overlay_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	base:8;				/* RW */
-		unsigned long	rsvd_32_47:16;
-		unsigned long	m_alias:5;			/* RW */
-		unsigned long	rsvd_53_62:10;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s5;
 };
 
 /* ========================================================================= */
-/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR                  */
+/*                    UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1                     */
 /* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x4800d0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR (			\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1 (				\
+	is_uv(UV5) ? 0x474000UL :					\
+	0)
+
+
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x000ffffffc000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	52
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x03f0000000000000UL
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffc000000UL :				\
+	0)
+#define UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT (			\
+	is_uv(UV5) ? 26 :						\
+	-1)
+
+union uvh_rh10_gam_mmioh_overlay_config1_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
 
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
+};
 
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* ========================================================================= */
+/*                   UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0                     */
+/* ========================================================================= */
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0 (				\
+	is_uv(UV5) ? 0x473800UL :					\
+	0)
 
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH (			\
+	is_uv(UV5) ? 128 :						\
+	0)
 
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x000000000000007fUL
 
 
-union uvh_rh_gam_alias210_redirect_config_0_mmr_u {
+union uvh_rh10_gam_mmioh_redirect_config0_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_redirect_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
 	} s;
-	struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} sx;
-	struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s2;
-	struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s3;
-	struct uv4h_rh_gam_alias210_redirect_config_0_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s4;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} s5;
 };
 
 /* ========================================================================= */
-/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR                  */
+/*                   UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1                     */
 /* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x4800e0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR (			\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR)
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1 (				\
+	is_uv(UV5) ? 0x474800UL :					\
+	0)
 
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH (			\
+	is_uv(UV5) ? 128 :						\
+	0)
 
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UVYH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x000000000000007fUL
 
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL
-
-
-union uvh_rh_gam_alias210_redirect_config_1_mmr_u {
+union uvh_rh10_gam_mmioh_redirect_config1_u {
 	unsigned long	v;
-	struct uvh_rh_gam_alias210_redirect_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
+
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
 	} s;
-	struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} sx;
-	struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s2;
-	struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s3;
-	struct uv4h_rh_gam_alias210_redirect_config_1_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s4;
+
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:7;			/* RW */
+		unsigned long	rsvd_7_63:57;
+	} s5;
 };
 
 /* ========================================================================= */
-/*                UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR                  */
+/*                     UVH_RH10_GAM_MMR_OVERLAY_CONFIG                       */
 /* ========================================================================= */
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x4800f0UL
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR (			\
-	is_uv2_hub() ? UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
-	is_uv3_hub() ? UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR)
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG (				\
+	is_uv(UV5) ? 0x470090UL :					\
+	0)
 
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+/* UVYH common defines */
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT	25
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK	0x000ffffffe000000UL
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVYH_RH10_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
 
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV5) ? 0x000ffffffe000000UL :				\
+	0)
+#define UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV5) ? 25 :						\
+	-1)
 
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+union uvh_rh10_gam_mmr_overlay_config_u {
+	unsigned long	v;
 
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24
-#define UV4H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL
+	/* UVH common struct */
+	struct uvh_rh10_gam_mmr_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	undef_52_62:11;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s;
 
+	/* UVYH common struct */
+	struct uvyh_rh10_gam_mmr_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	undef_52_62:11;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} sy;
 
-union uvh_rh_gam_alias210_redirect_config_2_mmr_u {
-	unsigned long	v;
-	struct uvh_rh_gam_alias210_redirect_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s;
-	struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} sx;
-	struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s2;
-	struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s3;
-	struct uv4h_rh_gam_alias210_redirect_config_2_mmr_s {
-		unsigned long	rsvd_0_23:24;
-		unsigned long	dest_base:22;			/* RW */
-		unsigned long	rsvd_46_63:18;
-	} s4;
+	/* UV5 unique struct */
+	struct uv5h_rh10_gam_mmr_overlay_config_s {
+		unsigned long	undef_0_24:25;			/* Undefined */
+		unsigned long	base:27;			/* RW */
+		unsigned long	undef_52_62:11;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s5;
 };
 
 /* ========================================================================= */
-/*                          UVH_RH_GAM_CONFIG_MMR                            */
+/*                        UVH_RH_GAM_ADDR_MAP_CONFIG                         */
 /* ========================================================================= */
-#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL
-#define UV4H_RH_GAM_CONFIG_MMR 0x480000UL
-#define UVH_RH_GAM_CONFIG_MMR (						\
-	is_uv2_hub() ? UV2H_RH_GAM_CONFIG_MMR :				\
-	is_uv3_hub() ? UV3H_RH_GAM_CONFIG_MMR :				\
-	/*is_uv4_hub*/ UV4H_RH_GAM_CONFIG_MMR)
-
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
+#define UVH_RH_GAM_ADDR_MAP_CONFIG (					\
+	is_uv(UV4) ? 0x480000UL :					\
+	is_uv(UV3) ? 0x1600000UL :					\
+	is_uv(UV2) ? 0x1600000UL :					\
+	0)
 
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
 
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
-#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_SHFT		6
+#define UVXH_RH_GAM_ADDR_MAP_CONFIG_N_SKT_MASK		0x00000000000003c0UL
 
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT		0
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK		0x000000000000003fUL
-#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
+/* UV3 unique defines */
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT		0
+#define UV3H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK		0x000000000000003fUL
 
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_SHFT		6
-#define UV4H_RH_GAM_CONFIG_MMR_N_SKT_MASK		0x00000000000003c0UL
+/* UV2 unique defines */
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_SHFT		0
+#define UV2H_RH_GAM_ADDR_MAP_CONFIG_M_SKT_MASK		0x000000000000003fUL
 
 
-union uvh_rh_gam_config_mmr_u {
+union uvh_rh_gam_addr_map_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_config_mmr_s {
+
+	/* UVH common struct */
+	struct uvh_rh_gam_addr_map_config_s {
 		unsigned long	rsvd_0_5:6;
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
 	} s;
-	struct uvxh_rh_gam_config_mmr_s {
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_addr_map_config_s {
 		unsigned long	rsvd_0_5:6;
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
 	} sx;
-	struct uv2h_rh_gam_config_mmr_s {
-		unsigned long	m_skt:6;			/* RW */
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_addr_map_config_s {
+		unsigned long	rsvd_0_5:6;
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
-	} s2;
-	struct uv3h_rh_gam_config_mmr_s {
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_addr_map_config_s {
 		unsigned long	m_skt:6;			/* RW */
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
 	} s3;
-	struct uv4h_rh_gam_config_mmr_s {
-		unsigned long	rsvd_0_5:6;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_addr_map_config_s {
+		unsigned long	m_skt:6;			/* RW */
 		unsigned long	n_skt:4;			/* RW */
 		unsigned long	rsvd_10_63:54;
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                    UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR                      */
+/*                    UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG                      */
 /* ========================================================================= */
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x480010UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	28
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT	62
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff0000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK	0x4000000000000000UL
-#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT	52
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK	0x00f0000000000000UL
-#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK (			\
-	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
-	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK)
-#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT (			\
-	is_uv2_hub() ? UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
-	is_uv3_hub() ? UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT)
-
-union uvh_rh_gam_gru_overlay_config_mmr_u {
+#define UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG (				\
+	is_uv(UV4) ? 0x4800c8UL :					\
+	is_uv(UV3) ? 0x16000c8UL :					\
+	is_uv(UV2) ? 0x16000c8UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_SHFT	24
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_BASE_MASK	0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_SHFT	48
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_M_ALIAS_MASK	0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_ALIAS_0_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+
+union uvh_rh_gam_alias_0_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_51:52;
-		unsigned long	n_gru:4;			/* RW */
-		unsigned long	rsvd_56_62:7;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_0_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uvxh_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_45:46;
-		unsigned long	rsvd_46_51:6;
-		unsigned long	n_gru:4;			/* RW */
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} sx;
-	struct uv2h_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_27:28;
-		unsigned long	base:18;			/* RW */
-		unsigned long	rsvd_46_51:6;
-		unsigned long	n_gru:4;			/* RW */
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_27:28;
-		unsigned long	base:18;			/* RW */
-		unsigned long	rsvd_46_51:6;
-		unsigned long	n_gru:4;			/* RW */
-		unsigned long	rsvd_56_61:6;
-		unsigned long	mode:1;				/* RW */
-		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_gru_overlay_config_mmr_s {
-		unsigned long	rsvd_0_24:25;
-		unsigned long	undef_25:1;			/* Undefined */
-		unsigned long	base:20;			/* RW */
-		unsigned long	rsvd_46_51:6;
-		unsigned long	n_gru:4;			/* RW */
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} s4;
-};
 
-/* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR                    */
-/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x483000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT	26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT	46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT	46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x03f0000000000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
-	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_mmioh_overlay_config0_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} s4;
-	struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:26;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;
-		unsigned long	undef_62:1;			/* Undefined */
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_0_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s4a;
-};
+	} sx;
 
-/* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR                    */
-/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR)
-
-
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT	26
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT	46
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT	46
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL
-
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 52
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x000ffffffc000000UL
-#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x03f0000000000000UL
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK)
-
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
-
-union uvh_rh_gam_mmioh_overlay_config1_mmr_u {
-	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;
-		unsigned long	rsvd_56_62:7;
-		unsigned long	enable:1;			/* RW */
-	} s3;
-	struct uv4h_rh_gam_mmioh_overlay_config1_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;
-		unsigned long	rsvd_56_62:7;
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_0_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s4;
-	struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:26;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;
-		unsigned long	undef_62:1;			/* Undefined */
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_0_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s4a;
-};
+	} s3;
 
-/* ========================================================================= */
-/*                   UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR                     */
-/* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL
-#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR uv_undefined("UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR")
-#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR)
-
-
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT	27
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT	46
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT	52
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffff8000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK	0x000fc00000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK	0x00f0000000000000UL
-#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL
-
-
-union uvh_rh_gam_mmioh_overlay_config_mmr_u {
-	unsigned long	v;
-	struct uv2h_rh_gam_mmioh_overlay_config_mmr_s {
-		unsigned long	rsvd_0_26:27;
-		unsigned long	base:19;			/* RW */
-		unsigned long	m_io:6;				/* RW */
-		unsigned long	n_io:4;				/* RW */
-		unsigned long	rsvd_56_62:7;
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_0_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s2;
 };
 
 /* ========================================================================= */
-/*                  UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR                    */
+/*                    UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG                     */
 /* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x483800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR)
+#define UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG (				\
+	is_uv(UV4) ? 0x4800d0UL :					\
+	is_uv(UV3) ? 0x16000d0UL :					\
+	is_uv(UV2) ? 0x16000d0UL :					\
+	0)
 
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH (			\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH :	\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH)
 
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
 
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
 
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL
+union uvh_rh_gam_alias_0_redirect_config_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_0_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s;
 
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000000fffUL
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_0_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} sx;
 
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK)
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_0_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s4;
 
-union uvh_rh_gam_mmioh_redirect_config0_mmr_u {
-	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s {
-		unsigned long	nasid:15;			/* RW */
-		unsigned long	rsvd_15_63:49;
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_0_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
 	} s3;
-	struct uv4h_rh_gam_mmioh_redirect_config0_mmr_s {
-		unsigned long	nasid:15;			/* RW */
-		unsigned long	rsvd_15_63:49;
-	} s4;
-	struct uv4ah_rh_gam_mmioh_redirect_config0_mmr_s {
-		unsigned long	nasid:12;			/* RW */
-		unsigned long	rsvd_12_63:52;
-	} s4a;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_0_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                  UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR                    */
+/*                    UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG                      */
 /* ========================================================================= */
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x484800UL
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR)
-
-#define UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH uv_undefined("UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH")
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH (			\
-	is_uv2_hub() ? UV2H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH :	\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH :	\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH)
+#define UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG (				\
+	is_uv(UV4) ? 0x4800d8UL :					\
+	is_uv(UV3) ? 0x16000d8UL :					\
+	is_uv(UV2) ? 0x16000d8UL :					\
+	0)
 
 
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_SHFT	24
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_BASE_MASK	0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_SHFT	48
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_M_ALIAS_MASK	0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_ALIAS_1_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
 
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0
-#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL
 
-#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000000fffUL
-
-#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK (		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
-	is_uv4a_hub() ? UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK : \
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK)
-
-union uvh_rh_gam_mmioh_redirect_config1_mmr_u {
+union uvh_rh_gam_alias_1_overlay_config_u {
 	unsigned long	v;
-	struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s {
-		unsigned long	nasid:15;			/* RW */
-		unsigned long	rsvd_15_63:49;
-	} s3;
-	struct uv4h_rh_gam_mmioh_redirect_config1_mmr_s {
-		unsigned long	nasid:15;			/* RW */
-		unsigned long	rsvd_15_63:49;
-	} s4;
-	struct uv4ah_rh_gam_mmioh_redirect_config1_mmr_s {
-		unsigned long	nasid:12;			/* RW */
-		unsigned long	rsvd_12_63:52;
-	} s4a;
-};
 
-/* ========================================================================= */
-/*                    UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR                      */
-/* ========================================================================= */
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x480028UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR (				\
-	is_uv2_hub() ? UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
-	is_uv3_hub() ? UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR :		\
-	/*is_uv4_hub*/ UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR)
-
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT	26
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT	63
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK	0x00003ffffc000000UL
-#define UV4H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK	0x8000000000000000UL
-
-
-union uvh_rh_gam_mmr_overlay_config_mmr_u {
-	unsigned long	v;
-	struct uvh_rh_gam_mmr_overlay_config_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	rsvd_46_62:17;
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_1_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s;
-	struct uvxh_rh_gam_mmr_overlay_config_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	rsvd_46_62:17;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_1_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} sx;
-	struct uv2h_rh_gam_mmr_overlay_config_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	rsvd_46_62:17;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_1_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s2;
-	struct uv3h_rh_gam_mmr_overlay_config_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	rsvd_46_62:17;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_1_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_rh_gam_mmr_overlay_config_mmr_s {
-		unsigned long	rsvd_0_25:26;
-		unsigned long	base:20;			/* RW */
-		unsigned long	rsvd_46_62:17;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_1_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
 		unsigned long	enable:1;			/* RW */
-	} s4;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                                 UVH_RTC                                   */
+/*                    UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG                     */
 /* ========================================================================= */
-#define UV2H_RTC 0x340000UL
-#define UV3H_RTC 0x340000UL
-#define UV4H_RTC 0xe0000UL
-#define UVH_RTC (							\
-	is_uv2_hub() ? UV2H_RTC :					\
-	is_uv3_hub() ? UV3H_RTC :					\
-	/*is_uv4_hub*/ UV4H_RTC)
+#define UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG (				\
+	is_uv(UV4) ? 0x4800e0UL :					\
+	is_uv(UV3) ? 0x16000e0UL :					\
+	is_uv(UV2) ? 0x16000e0UL :					\
+	0)
 
-#define UVH_RTC_REAL_TIME_CLOCK_SHFT			0
-#define UVH_RTC_REAL_TIME_CLOCK_MASK			0x00ffffffffffffffUL
 
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_1_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
 
-union uvh_rtc_u {
+
+union uvh_rh_gam_alias_1_redirect_config_u {
 	unsigned long	v;
-	struct uvh_rtc_s {
-		unsigned long	real_time_clock:56;		/* RW */
-		unsigned long	rsvd_56_63:8;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_1_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
 	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_1_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_1_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_1_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_1_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                           UVH_RTC1_INT_CONFIG                             */
+/*                    UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG                      */
 /* ========================================================================= */
-#define UVH_RTC1_INT_CONFIG 0x615c0UL
+#define UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG (				\
+	is_uv(UV4) ? 0x4800e8UL :					\
+	is_uv(UV3) ? 0x16000e8UL :					\
+	is_uv(UV2) ? 0x16000e8UL :					\
+	0)
 
-#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT			0
-#define UVH_RTC1_INT_CONFIG_DM_SHFT			8
-#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT		11
-#define UVH_RTC1_INT_CONFIG_STATUS_SHFT			12
-#define UVH_RTC1_INT_CONFIG_P_SHFT			13
-#define UVH_RTC1_INT_CONFIG_T_SHFT			15
-#define UVH_RTC1_INT_CONFIG_M_SHFT			16
-#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT		32
-#define UVH_RTC1_INT_CONFIG_VECTOR_MASK			0x00000000000000ffUL
-#define UVH_RTC1_INT_CONFIG_DM_MASK			0x0000000000000700UL
-#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK		0x0000000000000800UL
-#define UVH_RTC1_INT_CONFIG_STATUS_MASK			0x0000000000001000UL
-#define UVH_RTC1_INT_CONFIG_P_MASK			0x0000000000002000UL
-#define UVH_RTC1_INT_CONFIG_T_MASK			0x0000000000008000UL
-#define UVH_RTC1_INT_CONFIG_M_MASK			0x0000000000010000UL
-#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_SHFT	24
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_BASE_MASK	0x00000000ff000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_SHFT	48
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_M_ALIAS_MASK	0x001f000000000000UL
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_ALIAS_2_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
 
-union uvh_rtc1_int_config_u {
+
+union uvh_rh_gam_alias_2_overlay_config_u {
 	unsigned long	v;
-	struct uvh_rtc1_int_config_s {
-		unsigned long	vector_:8;			/* RW */
-		unsigned long	dm:3;				/* RW */
-		unsigned long	destmode:1;			/* RW */
-		unsigned long	status:1;			/* RO */
-		unsigned long	p:1;				/* RO */
-		unsigned long	rsvd_14:1;
-		unsigned long	t:1;				/* RO */
-		unsigned long	m:1;				/* RW */
-		unsigned long	rsvd_17_31:15;
-		unsigned long	apic_id:32;			/* RW */
+
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_2_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
 	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_2_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
+	} sx;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_2_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_2_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_2_overlay_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	base:8;				/* RW */
+		unsigned long	rsvd_32_47:16;
+		unsigned long	m_alias:5;			/* RW */
+		unsigned long	rsvd_53_62:10;
+		unsigned long	enable:1;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                               UVH_SCRATCH5                                */
+/*                    UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG                     */
 /* ========================================================================= */
-#define UV2H_SCRATCH5 0x2d0200UL
-#define UV3H_SCRATCH5 0x2d0200UL
-#define UV4H_SCRATCH5 0xb0200UL
-#define UVH_SCRATCH5 (							\
-	is_uv2_hub() ? UV2H_SCRATCH5 :					\
-	is_uv3_hub() ? UV3H_SCRATCH5 :					\
-	/*is_uv4_hub*/ UV4H_SCRATCH5)
-
-#define UV2H_SCRATCH5_32 0x778
-#define UV3H_SCRATCH5_32 0x778
-#define UV4H_SCRATCH5_32 0x798
-#define UVH_SCRATCH5_32 (						\
-	is_uv2_hub() ? UV2H_SCRATCH5_32 :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_32 :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_32)
+#define UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG (				\
+	is_uv(UV4) ? 0x4800f0UL :					\
+	is_uv(UV3) ? 0x16000f0UL :					\
+	is_uv(UV2) ? 0x16000f0UL :					\
+	0)
 
-#define UVH_SCRATCH5_SCRATCH5_SHFT			0
-#define UVH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
+/* UVXH common defines */
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_SHFT 24
+#define UVXH_RH_GAM_ALIAS_2_REDIRECT_CONFIG_DEST_BASE_MASK 0x00003fffff000000UL
 
-union uvh_scratch5_u {
+
+union uvh_rh_gam_alias_2_redirect_config_u {
 	unsigned long	v;
-	struct uvh_scratch5_s {
-		unsigned long	scratch5:64;			/* RW, W1CS */
-	} s;
-};
 
-/* ========================================================================= */
-/*                            UVH_SCRATCH5_ALIAS                             */
-/* ========================================================================= */
-#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
-#define UV4H_SCRATCH5_ALIAS 0xb0208UL
-#define UVH_SCRATCH5_ALIAS (						\
-	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS)
+	/* UVH common struct */
+	struct uvh_rh_gam_alias_2_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s;
 
-#define UV2H_SCRATCH5_ALIAS_32 0x780
-#define UV3H_SCRATCH5_ALIAS_32 0x780
-#define UV4H_SCRATCH5_ALIAS_32 0x7a0
-#define UVH_SCRATCH5_ALIAS_32 (						\
-	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_32 :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_32 :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_32)
+	/* UVXH common struct */
+	struct uvxh_rh_gam_alias_2_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} sx;
 
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_alias_2_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s4;
 
-/* ========================================================================= */
-/*                           UVH_SCRATCH5_ALIAS_2                            */
-/* ========================================================================= */
-#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
-#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
-#define UVH_SCRATCH5_ALIAS_2 (						\
-	is_uv2_hub() ? UV2H_SCRATCH5_ALIAS_2 :				\
-	is_uv3_hub() ? UV3H_SCRATCH5_ALIAS_2 :				\
-	/*is_uv4_hub*/ UV4H_SCRATCH5_ALIAS_2)
-#define UVH_SCRATCH5_ALIAS_2_32 0x788
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_alias_2_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s3;
 
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_alias_2_redirect_config_s {
+		unsigned long	rsvd_0_23:24;
+		unsigned long	dest_base:22;			/* RW */
+		unsigned long	rsvd_46_63:18;
+	} s2;
+};
 
 /* ========================================================================= */
-/*                          UVXH_EVENT_OCCURRED2                             */
+/*                      UVH_RH_GAM_GRU_OVERLAY_CONFIG                        */
 /* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2 0x70100UL
-
-#define UV2H_EVENT_OCCURRED2_32 0xb68
-#define UV3H_EVENT_OCCURRED2_32 0xb68
-#define UV4H_EVENT_OCCURRED2_32 0x608
-#define UVH_EVENT_OCCURRED2_32 (					\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_32 :			\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_32 :			\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_32)
-
-
-#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT			0
-#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT			1
-#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT			2
-#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT			3
-#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT			4
-#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT			5
-#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT			6
-#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT			7
-#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT			8
-#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT			9
-#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT		10
-#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT		11
-#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT		12
-#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT		13
-#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT		14
-#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT		15
-#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT		16
-#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT		17
-#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT		18
-#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT		19
-#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT		20
-#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT		21
-#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT		22
-#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT		23
-#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT		24
-#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT		25
-#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT		26
-#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT		27
-#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT		28
-#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT		29
-#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT		30
-#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT		31
-#define UV2H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
-#define UV2H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
-#define UV2H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
-#define UV2H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
-#define UV2H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
-#define UV2H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
-#define UV2H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
-#define UV2H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
-#define UV2H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
-#define UV2H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
-#define UV2H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
-#define UV2H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
-#define UV2H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
-#define UV2H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
-#define UV2H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
-#define UV2H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
-#define UV2H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
-#define UV2H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
-#define UV2H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
-#define UV2H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
-#define UV2H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
-#define UV2H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
-#define UV2H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
-#define UV2H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
-#define UV2H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
-#define UV2H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG (					\
+	is_uv(UV4) ? 0x480010UL :					\
+	is_uv(UV3) ? 0x1600010UL :					\
+	is_uv(UV2) ? 0x1600010UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_SHFT	52
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_N_GRU_MASK	0x00f0000000000000UL
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x000ffffffc000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	26
+#define UV4H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x00003ffffc000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	28
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x00003ffff0000000UL
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_SHFT	62
+#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MODE_MASK	0x4000000000000000UL
+
+/* UV2 unique defines */
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	28
+#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x00003ffff0000000UL
+
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffff0000000UL :				\
+	is_uv(UV2) ? 0x00003ffff0000000UL :				\
+	0)
+#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 28 :						\
+	is_uv(UV2) ? 28 :						\
+	-1)
+
+union uvh_rh_gam_gru_overlay_config_u {
+	unsigned long	v;
 
-#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT			0
-#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT			1
-#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT			2
-#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT			3
-#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT			4
-#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT			5
-#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT			6
-#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT			7
-#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT			8
-#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT			9
-#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT		10
-#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT		11
-#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT		12
-#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT		13
-#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT		14
-#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT		15
-#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT		16
-#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT		17
-#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT		18
-#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT		19
-#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT		20
-#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT		21
-#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT		22
-#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT		23
-#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT		24
-#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT		25
-#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT		26
-#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT		27
-#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT		28
-#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT		29
-#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT		30
-#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT		31
-#define UV3H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000000001UL
-#define UV3H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000000002UL
-#define UV3H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000000004UL
-#define UV3H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000000008UL
-#define UV3H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000000010UL
-#define UV3H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000000020UL
-#define UV3H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000000000040UL
-#define UV3H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000000000080UL
-#define UV3H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000000000100UL
-#define UV3H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000000000200UL
-#define UV3H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000000000400UL
-#define UV3H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000000000800UL
-#define UV3H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000000001000UL
-#define UV3H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000000002000UL
-#define UV3H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000000004000UL
-#define UV3H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000000008000UL
-#define UV3H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000000010000UL
-#define UV3H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000000020000UL
-#define UV3H_EVENT_OCCURRED2_RTC_18_MASK		0x0000000000040000UL
-#define UV3H_EVENT_OCCURRED2_RTC_19_MASK		0x0000000000080000UL
-#define UV3H_EVENT_OCCURRED2_RTC_20_MASK		0x0000000000100000UL
-#define UV3H_EVENT_OCCURRED2_RTC_21_MASK		0x0000000000200000UL
-#define UV3H_EVENT_OCCURRED2_RTC_22_MASK		0x0000000000400000UL
-#define UV3H_EVENT_OCCURRED2_RTC_23_MASK		0x0000000000800000UL
-#define UV3H_EVENT_OCCURRED2_RTC_24_MASK		0x0000000001000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_25_MASK		0x0000000002000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_26_MASK		0x0000000004000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_27_MASK		0x0000000008000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_28_MASK		0x0000000010000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_29_MASK		0x0000000020000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_30_MASK		0x0000000040000000UL
-#define UV3H_EVENT_OCCURRED2_RTC_31_MASK		0x0000000080000000UL
+	/* UVH common struct */
+	struct uvh_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_45:46;
+		unsigned long	rsvd_46_51:6;
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_45:46;
+		unsigned long	rsvd_46_51:6;
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} sx;
+
+	/* UV4A unique struct */
+	struct uv4ah_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_24:25;
+		unsigned long	undef_25:1;			/* Undefined */
+		unsigned long	base:26;			/* RW */
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_24:25;
+		unsigned long	undef_25:1;			/* Undefined */
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_51:6;
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_27:28;
+		unsigned long	base:18;			/* RW */
+		unsigned long	rsvd_46_51:6;
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_61:6;
+		unsigned long	mode:1;				/* RW */
+		unsigned long	enable:1;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_gru_overlay_config_s {
+		unsigned long	rsvd_0_27:28;
+		unsigned long	base:18;			/* RW */
+		unsigned long	rsvd_46_51:6;
+		unsigned long	n_gru:4;			/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s2;
+};
+
+/* ========================================================================= */
+/*                     UVH_RH_GAM_MMIOH_OVERLAY_CONFIG                       */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG (				\
+	is_uv(UV2) ? 0x1600030UL :					\
+	0)
 
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_SHFT 0
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_SHFT 1
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_SHFT 2
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_SHFT 3
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_SHFT 4
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_SHFT 5
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_SHFT 6
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_SHFT 7
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_SHFT 8
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_SHFT 9
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_SHFT 10
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_SHFT 11
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_SHFT 12
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_SHFT 13
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_SHFT 14
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_SHFT 15
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_SHFT	16
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_SHFT	17
-#define UV4H_EVENT_OCCURRED2_RTC_0_SHFT			18
-#define UV4H_EVENT_OCCURRED2_RTC_1_SHFT			19
-#define UV4H_EVENT_OCCURRED2_RTC_2_SHFT			20
-#define UV4H_EVENT_OCCURRED2_RTC_3_SHFT			21
-#define UV4H_EVENT_OCCURRED2_RTC_4_SHFT			22
-#define UV4H_EVENT_OCCURRED2_RTC_5_SHFT			23
-#define UV4H_EVENT_OCCURRED2_RTC_6_SHFT			24
-#define UV4H_EVENT_OCCURRED2_RTC_7_SHFT			25
-#define UV4H_EVENT_OCCURRED2_RTC_8_SHFT			26
-#define UV4H_EVENT_OCCURRED2_RTC_9_SHFT			27
-#define UV4H_EVENT_OCCURRED2_RTC_10_SHFT		28
-#define UV4H_EVENT_OCCURRED2_RTC_11_SHFT		29
-#define UV4H_EVENT_OCCURRED2_RTC_12_SHFT		30
-#define UV4H_EVENT_OCCURRED2_RTC_13_SHFT		31
-#define UV4H_EVENT_OCCURRED2_RTC_14_SHFT		32
-#define UV4H_EVENT_OCCURRED2_RTC_15_SHFT		33
-#define UV4H_EVENT_OCCURRED2_RTC_16_SHFT		34
-#define UV4H_EVENT_OCCURRED2_RTC_17_SHFT		35
-#define UV4H_EVENT_OCCURRED2_RTC_18_SHFT		36
-#define UV4H_EVENT_OCCURRED2_RTC_19_SHFT		37
-#define UV4H_EVENT_OCCURRED2_RTC_20_SHFT		38
-#define UV4H_EVENT_OCCURRED2_RTC_21_SHFT		39
-#define UV4H_EVENT_OCCURRED2_RTC_22_SHFT		40
-#define UV4H_EVENT_OCCURRED2_RTC_23_SHFT		41
-#define UV4H_EVENT_OCCURRED2_RTC_24_SHFT		42
-#define UV4H_EVENT_OCCURRED2_RTC_25_SHFT		43
-#define UV4H_EVENT_OCCURRED2_RTC_26_SHFT		44
-#define UV4H_EVENT_OCCURRED2_RTC_27_SHFT		45
-#define UV4H_EVENT_OCCURRED2_RTC_28_SHFT		46
-#define UV4H_EVENT_OCCURRED2_RTC_29_SHFT		47
-#define UV4H_EVENT_OCCURRED2_RTC_30_SHFT		48
-#define UV4H_EVENT_OCCURRED2_RTC_31_SHFT		49
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT0_MASK 0x0000000000000001UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT1_MASK 0x0000000000000002UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT2_MASK 0x0000000000000004UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT3_MASK 0x0000000000000008UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT4_MASK 0x0000000000000010UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT5_MASK 0x0000000000000020UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT6_MASK 0x0000000000000040UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT7_MASK 0x0000000000000080UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT8_MASK 0x0000000000000100UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT9_MASK 0x0000000000000200UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT10_MASK 0x0000000000000400UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT11_MASK 0x0000000000000800UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT12_MASK 0x0000000000001000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT13_MASK 0x0000000000002000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT14_MASK 0x0000000000004000UL
-#define UV4H_EVENT_OCCURRED2_MESSAGE_ACCELERATOR_INT15_MASK 0x0000000000008000UL
-#define UV4H_EVENT_OCCURRED2_RTC_INTERVAL_INT_MASK	0x0000000000010000UL
-#define UV4H_EVENT_OCCURRED2_BAU_DASHBOARD_INT_MASK	0x0000000000020000UL
-#define UV4H_EVENT_OCCURRED2_RTC_0_MASK			0x0000000000040000UL
-#define UV4H_EVENT_OCCURRED2_RTC_1_MASK			0x0000000000080000UL
-#define UV4H_EVENT_OCCURRED2_RTC_2_MASK			0x0000000000100000UL
-#define UV4H_EVENT_OCCURRED2_RTC_3_MASK			0x0000000000200000UL
-#define UV4H_EVENT_OCCURRED2_RTC_4_MASK			0x0000000000400000UL
-#define UV4H_EVENT_OCCURRED2_RTC_5_MASK			0x0000000000800000UL
-#define UV4H_EVENT_OCCURRED2_RTC_6_MASK			0x0000000001000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_7_MASK			0x0000000002000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_8_MASK			0x0000000004000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_9_MASK			0x0000000008000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_10_MASK		0x0000000010000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_11_MASK		0x0000000020000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_12_MASK		0x0000000040000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_13_MASK		0x0000000080000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_14_MASK		0x0000000100000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_15_MASK		0x0000000200000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_16_MASK		0x0000000400000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_17_MASK		0x0000000800000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_18_MASK		0x0000001000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_19_MASK		0x0000002000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_20_MASK		0x0000004000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_21_MASK		0x0000008000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_22_MASK		0x0000010000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_23_MASK		0x0000020000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_24_MASK		0x0000040000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_25_MASK		0x0000080000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_26_MASK		0x0000100000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_27_MASK		0x0000200000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_28_MASK		0x0000400000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_29_MASK		0x0000800000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_30_MASK		0x0001000000000000UL
-#define UV4H_EVENT_OCCURRED2_RTC_31_MASK		0x0002000000000000UL
 
-#define UVXH_EVENT_OCCURRED2_RTC_1_MASK (				\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_RTC_1_MASK :		\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_RTC_1_MASK :		\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_RTC_1_MASK)
 
-union uvh_event_occurred2_u {
+/* UV2 unique defines */
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT	27
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_MASK	0x00003ffff8000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_SHFT	46
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_M_IO_MASK	0x000fc00000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_SHFT	52
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_N_IO_MASK	0x00f0000000000000UL
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV2) ? 27 :						\
+	uv_undefined("UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT"))
+
+union uvh_rh_gam_mmioh_overlay_config_u {
 	unsigned long	v;
-	struct uv2h_event_occurred2_s {
-		unsigned long	rtc_0:1;			/* RW */
-		unsigned long	rtc_1:1;			/* RW */
-		unsigned long	rtc_2:1;			/* RW */
-		unsigned long	rtc_3:1;			/* RW */
-		unsigned long	rtc_4:1;			/* RW */
-		unsigned long	rtc_5:1;			/* RW */
-		unsigned long	rtc_6:1;			/* RW */
-		unsigned long	rtc_7:1;			/* RW */
-		unsigned long	rtc_8:1;			/* RW */
-		unsigned long	rtc_9:1;			/* RW */
-		unsigned long	rtc_10:1;			/* RW */
-		unsigned long	rtc_11:1;			/* RW */
-		unsigned long	rtc_12:1;			/* RW */
-		unsigned long	rtc_13:1;			/* RW */
-		unsigned long	rtc_14:1;			/* RW */
-		unsigned long	rtc_15:1;			/* RW */
-		unsigned long	rtc_16:1;			/* RW */
-		unsigned long	rtc_17:1;			/* RW */
-		unsigned long	rtc_18:1;			/* RW */
-		unsigned long	rtc_19:1;			/* RW */
-		unsigned long	rtc_20:1;			/* RW */
-		unsigned long	rtc_21:1;			/* RW */
-		unsigned long	rtc_22:1;			/* RW */
-		unsigned long	rtc_23:1;			/* RW */
-		unsigned long	rtc_24:1;			/* RW */
-		unsigned long	rtc_25:1;			/* RW */
-		unsigned long	rtc_26:1;			/* RW */
-		unsigned long	rtc_27:1;			/* RW */
-		unsigned long	rtc_28:1;			/* RW */
-		unsigned long	rtc_29:1;			/* RW */
-		unsigned long	rtc_30:1;			/* RW */
-		unsigned long	rtc_31:1;			/* RW */
-		unsigned long	rsvd_32_63:32;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_overlay_config_s {
+		unsigned long	rsvd_0_26:27;
+		unsigned long	base:19;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;				/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_overlay_config_s {
+		unsigned long	rsvd_0_26:27;
+		unsigned long	base:19;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;				/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} sx;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_mmioh_overlay_config_s {
+		unsigned long	rsvd_0_26:27;
+		unsigned long	base:19;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;				/* RW */
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
 	} s2;
-	struct uv3h_event_occurred2_s {
-		unsigned long	rtc_0:1;			/* RW */
-		unsigned long	rtc_1:1;			/* RW */
-		unsigned long	rtc_2:1;			/* RW */
-		unsigned long	rtc_3:1;			/* RW */
-		unsigned long	rtc_4:1;			/* RW */
-		unsigned long	rtc_5:1;			/* RW */
-		unsigned long	rtc_6:1;			/* RW */
-		unsigned long	rtc_7:1;			/* RW */
-		unsigned long	rtc_8:1;			/* RW */
-		unsigned long	rtc_9:1;			/* RW */
-		unsigned long	rtc_10:1;			/* RW */
-		unsigned long	rtc_11:1;			/* RW */
-		unsigned long	rtc_12:1;			/* RW */
-		unsigned long	rtc_13:1;			/* RW */
-		unsigned long	rtc_14:1;			/* RW */
-		unsigned long	rtc_15:1;			/* RW */
-		unsigned long	rtc_16:1;			/* RW */
-		unsigned long	rtc_17:1;			/* RW */
-		unsigned long	rtc_18:1;			/* RW */
-		unsigned long	rtc_19:1;			/* RW */
-		unsigned long	rtc_20:1;			/* RW */
-		unsigned long	rtc_21:1;			/* RW */
-		unsigned long	rtc_22:1;			/* RW */
-		unsigned long	rtc_23:1;			/* RW */
-		unsigned long	rtc_24:1;			/* RW */
-		unsigned long	rtc_25:1;			/* RW */
-		unsigned long	rtc_26:1;			/* RW */
-		unsigned long	rtc_27:1;			/* RW */
-		unsigned long	rtc_28:1;			/* RW */
-		unsigned long	rtc_29:1;			/* RW */
-		unsigned long	rtc_30:1;			/* RW */
-		unsigned long	rtc_31:1;			/* RW */
-		unsigned long	rsvd_32_63:32;
+};
+
+/* ========================================================================= */
+/*                     UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0 (				\
+	is_uv(UV4) ? 0x483000UL :					\
+	is_uv(UV3) ? 0x1603000UL :					\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT	26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK	0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT	46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK	0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_SHFT	63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 26 :						\
+	-1)
+
+union uvh_rh_gam_mmioh_overlay_config0_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} sx;
+
+	/* UV4A unique struct */
+	struct uv4ah_rh_gam_mmioh_overlay_config0_mmr_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_overlay_config0_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
 	} s3;
-	struct uv4h_event_occurred2_s {
-		unsigned long	message_accelerator_int0:1;	/* RW */
-		unsigned long	message_accelerator_int1:1;	/* RW */
-		unsigned long	message_accelerator_int2:1;	/* RW */
-		unsigned long	message_accelerator_int3:1;	/* RW */
-		unsigned long	message_accelerator_int4:1;	/* RW */
-		unsigned long	message_accelerator_int5:1;	/* RW */
-		unsigned long	message_accelerator_int6:1;	/* RW */
-		unsigned long	message_accelerator_int7:1;	/* RW */
-		unsigned long	message_accelerator_int8:1;	/* RW */
-		unsigned long	message_accelerator_int9:1;	/* RW */
-		unsigned long	message_accelerator_int10:1;	/* RW */
-		unsigned long	message_accelerator_int11:1;	/* RW */
-		unsigned long	message_accelerator_int12:1;	/* RW */
-		unsigned long	message_accelerator_int13:1;	/* RW */
-		unsigned long	message_accelerator_int14:1;	/* RW */
-		unsigned long	message_accelerator_int15:1;	/* RW */
-		unsigned long	rtc_interval_int:1;		/* RW */
-		unsigned long	bau_dashboard_int:1;		/* RW */
-		unsigned long	rtc_0:1;			/* RW */
-		unsigned long	rtc_1:1;			/* RW */
-		unsigned long	rtc_2:1;			/* RW */
-		unsigned long	rtc_3:1;			/* RW */
-		unsigned long	rtc_4:1;			/* RW */
-		unsigned long	rtc_5:1;			/* RW */
-		unsigned long	rtc_6:1;			/* RW */
-		unsigned long	rtc_7:1;			/* RW */
-		unsigned long	rtc_8:1;			/* RW */
-		unsigned long	rtc_9:1;			/* RW */
-		unsigned long	rtc_10:1;			/* RW */
-		unsigned long	rtc_11:1;			/* RW */
-		unsigned long	rtc_12:1;			/* RW */
-		unsigned long	rtc_13:1;			/* RW */
-		unsigned long	rtc_14:1;			/* RW */
-		unsigned long	rtc_15:1;			/* RW */
-		unsigned long	rtc_16:1;			/* RW */
-		unsigned long	rtc_17:1;			/* RW */
-		unsigned long	rtc_18:1;			/* RW */
-		unsigned long	rtc_19:1;			/* RW */
-		unsigned long	rtc_20:1;			/* RW */
-		unsigned long	rtc_21:1;			/* RW */
-		unsigned long	rtc_22:1;			/* RW */
-		unsigned long	rtc_23:1;			/* RW */
-		unsigned long	rtc_24:1;			/* RW */
-		unsigned long	rtc_25:1;			/* RW */
-		unsigned long	rtc_26:1;			/* RW */
-		unsigned long	rtc_27:1;			/* RW */
-		unsigned long	rtc_28:1;			/* RW */
-		unsigned long	rtc_29:1;			/* RW */
-		unsigned long	rtc_30:1;			/* RW */
-		unsigned long	rtc_31:1;			/* RW */
-		unsigned long	rsvd_50_63:14;
+};
+
+/* ========================================================================= */
+/*                     UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1                      */
+/* ========================================================================= */
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1 (				\
+	is_uv(UV4) ? 0x484000UL :					\
+	is_uv(UV3) ? 0x1604000UL :					\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x000ffffffc000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	52
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x03f0000000000000UL
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UV4AH_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x00003ffffc000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	46
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x000fc00000000000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT	26
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK	0x00003ffffc000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT	46
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK	0x000fc00000000000UL
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_SHFT	63
+#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_ENABLE_MASK	0x8000000000000000UL
+
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL : \
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	0)
+#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 26 :						\
+	-1)
+
+union uvh_rh_gam_mmioh_overlay_config1_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} sx;
+
+	/* UV4A unique struct */
+	struct uv4ah_rh_gam_mmioh_overlay_config1_mmr_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:26;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	undef_62:1;			/* Undefined */
+		unsigned long	enable:1;			/* RW */
+	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
 	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_overlay_config1_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	m_io:6;				/* RW */
+		unsigned long	n_io:4;
+		unsigned long	rsvd_56_62:7;
+		unsigned long	enable:1;			/* RW */
+	} s3;
 };
 
 /* ========================================================================= */
-/*                       UVXH_EVENT_OCCURRED2_ALIAS                          */
+/*                    UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0                      */
 /* ========================================================================= */
-#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0 (				\
+	is_uv(UV4) ? 0x483800UL :					\
+	is_uv(UV3) ? 0x1603800UL :					\
+	0)
 
-#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV3H_EVENT_OCCURRED2_ALIAS_32 0xb70
-#define UV4H_EVENT_OCCURRED2_ALIAS_32 0x610
-#define UVH_EVENT_OCCURRED2_ALIAS_32 (					\
-	is_uv2_hub() ? UV2H_EVENT_OCCURRED2_ALIAS_32 :			\
-	is_uv3_hub() ? UV3H_EVENT_OCCURRED2_ALIAS_32 :			\
-	/*is_uv4_hub*/ UV4H_EVENT_OCCURRED2_ALIAS_32)
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH (			\
+	is_uv(UV4) ? 128 :						\
+	is_uv(UV3) ? 128 :						\
+	0)
 
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000000fffUL
 
-/* ========================================================================= */
-/*                   UVXH_LB_BAU_SB_ACTIVATION_STATUS_2                      */
-/* ========================================================================= */
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2 0xc8130UL
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2)
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000007fffUL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000007fffUL
 
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0xa10
-#define UVH_LB_BAU_SB_ACTIVATION_STATUS_2_32 (				\
-	is_uv2_hub() ? UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 :		\
-	is_uv3_hub() ? UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 :		\
-	/*is_uv4_hub*/ UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_32)
 
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+union uvh_rh_gam_mmioh_redirect_config0_u {
+	unsigned long	v;
 
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s;
 
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} sx;
 
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0
-#define UV4H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL
+	struct uv4ah_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:12;			/* RW */
+		unsigned long	rsvd_12_63:52;
+	} s4a;
 
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s4;
 
-union uvxh_lb_bau_sb_activation_status_2_u {
-	unsigned long	v;
-	struct uvxh_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} sx;
-	struct uv2h_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} s2;
-	struct uv3h_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_redirect_config0_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
 	} s3;
-	struct uv4h_lb_bau_sb_activation_status_2_s {
-		unsigned long	aux_error:64;			/* RW */
-	} s4;
 };
 
 /* ========================================================================= */
-/*                          UV3H_GR0_GAM_GR_CONFIG                           */
+/*                    UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1                      */
 /* ========================================================================= */
-#define UV3H_GR0_GAM_GR_CONFIG				0xc00028UL
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1 (				\
+	is_uv(UV4) ? 0x484800UL :					\
+	is_uv(UV3) ? 0x1604800UL :					\
+	0)
 
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_SHFT		0
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_SHFT		10
-#define UV3H_GR0_GAM_GR_CONFIG_M_SKT_MASK		0x000000000000003fUL
-#define UV3H_GR0_GAM_GR_CONFIG_SUBSPACE_MASK		0x0000000000000400UL
+#define UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH (			\
+	is_uv(UV4) ? 128 :						\
+	is_uv(UV3) ? 128 :						\
+	0)
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_SHFT	0
+#define UV4AH_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK	0x0000000000000fffUL
+
+/* UV4 unique defines */
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UV4H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x0000000000007fffUL
+
+/* UV3 unique defines */
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_SHFT	0
+#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK	0x0000000000007fffUL
 
-union uv3h_gr0_gam_gr_config_u {
+
+union uvh_rh_gam_mmioh_redirect_config1_u {
 	unsigned long	v;
-	struct uv3h_gr0_gam_gr_config_s {
-		unsigned long	m_skt:6;			/* RW */
-		unsigned long	undef_6_9:4;			/* Undefined */
-		unsigned long	subspace:1;			/* RW */
-		unsigned long	reserved:53;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} sx;
+
+	struct uv4ah_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:12;			/* RW */
+		unsigned long	rsvd_12_63:52;
+	} s4a;
+
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
+	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmioh_redirect_config1_s {
+		unsigned long	nasid:15;			/* RW */
+		unsigned long	rsvd_15_63:49;
 	} s3;
 };
 
 /* ========================================================================= */
-/*                       UV4H_LB_PROC_INTD_QUEUE_FIRST                       */
+/*                      UVH_RH_GAM_MMR_OVERLAY_CONFIG                        */
 /* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST			0xa4100UL
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG (					\
+	is_uv(UV4) ? 0x480028UL :					\
+	is_uv(UV3) ? 0x1600028UL :					\
+	is_uv(UV2) ? 0x1600028UL :					\
+	0)
+
+
+/* UVXH common defines */
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT	26
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	is_uv(UV2) ? 0x00003ffffc000000UL :				\
+	0)
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_SHFT	63
+#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_ENABLE_MASK	0x8000000000000000UL
+
+/* UV4A unique defines */
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT	26
+#define UV4AH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK	0x000ffffffc000000UL
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_MASK (			\
+	is_uv(UV4A) ? 0x000ffffffc000000UL :				\
+	is_uv(UV4) ? 0x00003ffffc000000UL :				\
+	is_uv(UV3) ? 0x00003ffffc000000UL :				\
+	is_uv(UV2) ? 0x00003ffffc000000UL :				\
+	0)
+
+#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT (			\
+	is_uv(UV4) ? 26 :						\
+	is_uv(UV3) ? 26 :						\
+	is_uv(UV2) ? 26 :						\
+	-1)
+
+union uvh_rh_gam_mmr_overlay_config_u {
+	unsigned long	v;
+
+	/* UVH common struct */
+	struct uvh_rh_gam_mmr_overlay_config_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_62:17;
+		unsigned long	enable:1;			/* RW */
+	} s;
 
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_SHFT 6
-#define UV4H_LB_PROC_INTD_QUEUE_FIRST_FIRST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffc0UL
+	/* UVXH common struct */
+	struct uvxh_rh_gam_mmr_overlay_config_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_62:17;
+		unsigned long	enable:1;			/* RW */
+	} sx;
 
-union uv4h_lb_proc_intd_queue_first_u {
-	unsigned long	v;
-	struct uv4h_lb_proc_intd_queue_first_s {
-		unsigned long	undef_0_5:6;			/* Undefined */
-		unsigned long	first_payload_address:40;	/* RW */
+	/* UV4 unique struct */
+	struct uv4h_rh_gam_mmr_overlay_config_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_62:17;
+		unsigned long	enable:1;			/* RW */
 	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rh_gam_mmr_overlay_config_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_62:17;
+		unsigned long	enable:1;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rh_gam_mmr_overlay_config_s {
+		unsigned long	rsvd_0_25:26;
+		unsigned long	base:20;			/* RW */
+		unsigned long	rsvd_46_62:17;
+		unsigned long	enable:1;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                       UV4H_LB_PROC_INTD_QUEUE_LAST                        */
+/*                                 UVH_RTC                                   */
 /* ========================================================================= */
-#define UV4H_LB_PROC_INTD_QUEUE_LAST			0xa4108UL
+#define UVH_RTC (							\
+	is_uv(UV5) ? 0xe0000UL :					\
+	is_uv(UV4) ? 0xe0000UL :					\
+	is_uv(UV3) ? 0x340000UL :					\
+	is_uv(UV2) ? 0x340000UL :					\
+	0)
+
+/* UVH common defines*/
+#define UVH_RTC_REAL_TIME_CLOCK_SHFT			0
+#define UVH_RTC_REAL_TIME_CLOCK_MASK			0x00ffffffffffffffUL
 
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_SHFT 5
-#define UV4H_LB_PROC_INTD_QUEUE_LAST_LAST_PAYLOAD_ADDRESS_MASK 0x00003fffffffffe0UL
 
-union uv4h_lb_proc_intd_queue_last_u {
+union uvh_rtc_u {
 	unsigned long	v;
-	struct uv4h_lb_proc_intd_queue_last_s {
-		unsigned long	undef_0_4:5;			/* Undefined */
-		unsigned long	last_payload_address:41;	/* RW */
+
+	/* UVH common struct */
+	struct uvh_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s;
+
+	/* UV5 unique struct */
+	struct uv5h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
 	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rtc_s {
+		unsigned long	real_time_clock:56;		/* RW */
+		unsigned long	rsvd_56_63:8;
+	} s2;
 };
 
 /* ========================================================================= */
-/*                     UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR                      */
+/*                           UVH_RTC1_INT_CONFIG                             */
 /* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR		0xa4118UL
+#define UVH_RTC1_INT_CONFIG 0x615c0UL
+
+/* UVH common defines*/
+#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT			0
+#define UVH_RTC1_INT_CONFIG_VECTOR_MASK			0x00000000000000ffUL
+#define UVH_RTC1_INT_CONFIG_DM_SHFT			8
+#define UVH_RTC1_INT_CONFIG_DM_MASK			0x0000000000000700UL
+#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT		11
+#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK		0x0000000000000800UL
+#define UVH_RTC1_INT_CONFIG_STATUS_SHFT			12
+#define UVH_RTC1_INT_CONFIG_STATUS_MASK			0x0000000000001000UL
+#define UVH_RTC1_INT_CONFIG_P_SHFT			13
+#define UVH_RTC1_INT_CONFIG_P_MASK			0x0000000000002000UL
+#define UVH_RTC1_INT_CONFIG_T_SHFT			15
+#define UVH_RTC1_INT_CONFIG_T_MASK			0x0000000000008000UL
+#define UVH_RTC1_INT_CONFIG_M_SHFT			16
+#define UVH_RTC1_INT_CONFIG_M_MASK			0x0000000000010000UL
+#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT		32
+#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK		0xffffffff00000000UL
 
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR_SOFT_ACK_PENDING_FLAGS_MASK 0x00000000000000ffUL
 
-union uv4h_lb_proc_intd_soft_ack_clear_u {
+union uvh_rtc1_int_config_u {
 	unsigned long	v;
-	struct uv4h_lb_proc_intd_soft_ack_clear_s {
-		unsigned long	soft_ack_pending_flags:8;	/* WP */
+
+	/* UVH common struct */
+	struct uvh_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s;
+
+	/* UV5 unique struct */
+	struct uv5h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
 	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_rtc1_int_config_s {
+		unsigned long	vector_:8;			/* RW */
+		unsigned long	dm:3;				/* RW */
+		unsigned long	destmode:1;			/* RW */
+		unsigned long	status:1;			/* RO */
+		unsigned long	p:1;				/* RO */
+		unsigned long	rsvd_14:1;
+		unsigned long	t:1;				/* RO */
+		unsigned long	m:1;				/* RW */
+		unsigned long	rsvd_17_31:15;
+		unsigned long	apic_id:32;			/* RW */
+	} s2;
 };
 
 /* ========================================================================= */
-/*                    UV4H_LB_PROC_INTD_SOFT_ACK_PENDING                     */
+/*                               UVH_SCRATCH5                                */
 /* ========================================================================= */
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING		0xa4110UL
+#define UVH_SCRATCH5 (							\
+	is_uv(UV5) ? 0xb0200UL :					\
+	is_uv(UV4) ? 0xb0200UL :					\
+	is_uv(UV3) ? 0x2d0200UL :					\
+	is_uv(UV2) ? 0x2d0200UL :					\
+	0)
+#define UV5H_SCRATCH5 0xb0200UL
+#define UV4H_SCRATCH5 0xb0200UL
+#define UV3H_SCRATCH5 0x2d0200UL
+#define UV2H_SCRATCH5 0x2d0200UL
+
+/* UVH common defines*/
+#define UVH_SCRATCH5_SCRATCH5_SHFT			0
+#define UVH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
+
+/* UVXH common defines */
+#define UVXH_SCRATCH5_SCRATCH5_SHFT			0
+#define UVXH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
+
+/* UVYH common defines */
+#define UVYH_SCRATCH5_SCRATCH5_SHFT			0
+#define UVYH_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
+
+/* UV5 unique defines */
+#define UV5H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV5H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
+
+/* UV4 unique defines */
+#define UV4H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV4H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
+
+/* UV3 unique defines */
+#define UV3H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV3H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_SHFT 0
-#define UV4H_LB_PROC_INTD_SOFT_ACK_PENDING_SOFT_ACK_FLAGS_MASK 0x00000000000000ffUL
+/* UV2 unique defines */
+#define UV2H_SCRATCH5_SCRATCH5_SHFT			0
+#define UV2H_SCRATCH5_SCRATCH5_MASK			0xffffffffffffffffUL
 
-union uv4h_lb_proc_intd_soft_ack_pending_u {
+
+union uvh_scratch5_u {
 	unsigned long	v;
-	struct uv4h_lb_proc_intd_soft_ack_pending_s {
-		unsigned long	soft_ack_flags:8;		/* RW */
+
+	/* UVH common struct */
+	struct uvh_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s;
+
+	/* UVXH common struct */
+	struct uvxh_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} sx;
+
+	/* UVYH common struct */
+	struct uvyh_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} sy;
+
+	/* UV5 unique struct */
+	struct uv5h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s5;
+
+	/* UV4 unique struct */
+	struct uv4h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
 	} s4;
+
+	/* UV3 unique struct */
+	struct uv3h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s3;
+
+	/* UV2 unique struct */
+	struct uv2h_scratch5_s {
+		unsigned long	scratch5:64;			/* RW */
+	} s2;
 };
 
+/* ========================================================================= */
+/*                            UVH_SCRATCH5_ALIAS                             */
+/* ========================================================================= */
+#define UVH_SCRATCH5_ALIAS (						\
+	is_uv(UV5) ? 0xb0208UL :					\
+	is_uv(UV4) ? 0xb0208UL :					\
+	is_uv(UV3) ? 0x2d0208UL :					\
+	is_uv(UV2) ? 0x2d0208UL :					\
+	0)
+#define UV5H_SCRATCH5_ALIAS 0xb0208UL
+#define UV4H_SCRATCH5_ALIAS 0xb0208UL
+#define UV3H_SCRATCH5_ALIAS 0x2d0208UL
+#define UV2H_SCRATCH5_ALIAS 0x2d0208UL
+
+
+/* ========================================================================= */
+/*                           UVH_SCRATCH5_ALIAS_2                            */
+/* ========================================================================= */
+#define UVH_SCRATCH5_ALIAS_2 (						\
+	is_uv(UV5) ? 0xb0210UL :					\
+	is_uv(UV4) ? 0xb0210UL :					\
+	is_uv(UV3) ? 0x2d0210UL :					\
+	is_uv(UV2) ? 0x2d0210UL :					\
+	0)
+#define UV5H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV4H_SCRATCH5_ALIAS_2 0xb0210UL
+#define UV3H_SCRATCH5_ALIAS_2 0x2d0210UL
+#define UV2H_SCRATCH5_ALIAS_2 0x2d0210UL
+
+
 
 #endif /* _ASM_X86_UV_UV_MMRS_H */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f51fabf5601034..d357711072a186 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -29,6 +29,7 @@ static int			uv_hubbed_system;
 static int			uv_hubless_system;
 static u64			gru_start_paddr, gru_end_paddr;
 static union uvh_apicid		uvh_apicid;
+static int			uv_node_id;
 
 /* Unpack OEM/TABLE ID's to be NULL terminated strings */
 static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
@@ -85,43 +86,73 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
 	return is_ISA_range(start, end) || is_GRU_range(start, end);
 }
 
-static int __init early_get_pnodeid(void)
+static void __init early_get_pnodeid(void)
 {
-	union uvh_node_id_u node_id;
-	union uvh_rh_gam_config_mmr_u  m_n_config;
+	union uvh_rh_gam_addr_map_config_u  m_n_config;
 	int pnode;
 
-	/* Currently, all blades have same revision number */
+	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);
+
+	if (is_uv4_hub())
+		uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
+
+	uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
+	pnode = (uv_node_id >> 1) & uv_cpuid.pnode_mask;
+	uv_cpuid.gpa_shift = 46;	/* Default unless changed */
+
+	pr_info("UV: n_skt:%d pnmsk:%x pn:%x\n",
+		m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
+}
+
+/* Running on a UV Hubbed system, determine which UV Hub Type it is */
+static int __init early_set_hub_type(void)
+{
+	union uvh_node_id_u node_id;
+
+	/*
+	 * The NODE_ID MMR is always at offset 0.
+	 * Contains the chip part # + revision.
+	 * Node_id field started with 15 bits,
+	 * ... now 7 but upper 8 are masked to 0.
+	 * All blades/nodes have the same part # and hub revision.
+	 */
 	node_id.v = uv_early_read_mmr(UVH_NODE_ID);
-	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
-	uv_min_hub_revision_id = node_id.s.revision;
+	uv_node_id = node_id.sx.node_id;
 
 	switch (node_id.s.part_number) {
-	case UV2_HUB_PART_NUMBER:
-	case UV2_HUB_PART_NUMBER_X:
-		uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+
+	/* UV4/4A only have a revision difference */
+	case UV4_HUB_PART_NUMBER:
+		uv_min_hub_revision_id = node_id.s.revision
+					 + UV4_HUB_REVISION_BASE;
+		uv_hub_type_set(UV4);
+		if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE)
+			uv_hub_type_set(UV4|UV4A);
 		break;
+
 	case UV3_HUB_PART_NUMBER:
 	case UV3_HUB_PART_NUMBER_X:
-		uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
+		uv_min_hub_revision_id = node_id.s.revision
+					 + UV3_HUB_REVISION_BASE;
+		uv_hub_type_set(UV3);
 		break;
 
-	/* Update: UV4A has only a modified revision to indicate HUB fixes */
-	case UV4_HUB_PART_NUMBER:
-		uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
-		uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
+	case UV2_HUB_PART_NUMBER:
+	case UV2_HUB_PART_NUMBER_X:
+		uv_min_hub_revision_id = node_id.s.revision
+					 + UV2_HUB_REVISION_BASE - 1;
+		uv_hub_type_set(UV2);
 		break;
+
+	default:
+		return 0;
 	}
 
-	uv_hub_info->hub_revision = uv_min_hub_revision_id;
-	uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
-	pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask;
-	uv_cpuid.gpa_shift = 46;	/* Default unless changed */
+	pr_info("UV: part#:%x rev:%d rev_id:%d UVtype:0x%x\n",
+		node_id.s.part_number, node_id.s.revision,
+		uv_min_hub_revision_id, is_uv(~0));
 
-	pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n",
-		node_id.s.revision, node_id.s.part_number, node_id.s.node_id,
-		m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
-	return pnode;
+	return 1;
 }
 
 static void __init uv_tsc_check_sync(void)
@@ -221,29 +252,26 @@ static void __init uv_stringify(int len, char *to, char *from)
 	strncpy(to, from, len-1);
 }
 
-static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
+static int __init uv_set_system_type(char *_oem_id)
 {
-	int pnodeid;
-	int uv_apic;
-
+	/* Save OEM ID */
 	uv_stringify(sizeof(oem_id), oem_id, _oem_id);
-	uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
 
+	/* Set hubless type if true */
 	if (strncmp(oem_id, "SGI", 3) != 0) {
 		if (strncmp(oem_id, "NSGI", 4) != 0)
 			return 0;
 
-		/* UV4 Hubless, CH, (0x11:UV4+Any) */
+		/* UV4 Hubless: CH */
 		if (strncmp(oem_id, "NSGI4", 5) == 0)
 			uv_hubless_system = 0x11;
 
-		/* UV3 Hubless, UV300/MC990X w/o hub (0x9:UV3+Any) */
+		/* UV3 Hubless: UV300/MC990X w/o hub */
 		else
 			uv_hubless_system = 0x9;
 
-		pr_info("UV: OEM IDs %s/%s, HUBLESS(0x%x)\n",
-			oem_id, oem_table_id, uv_hubless_system);
-
+		pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n",
+			oem_id, oem_table_id, uv_system_type, uv_hubless_system);
 		return 0;
 	}
 
@@ -252,60 +280,78 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
 		return 0;
 	}
 
-	/* Set up early hub type field in uv_hub_info for Node 0 */
-	uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+	/* Set hubbed type if true */
+	uv_hub_info->hub_revision =
+		!strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
+		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : 0;
 
-	/*
-	 * Determine UV arch type.
-	 *   SGI2: UV2000/3000
-	 *   SGI3: UV300 (truncated to 4 chars because of different varieties)
-	 *   SGI4: UV400 (truncated to 4 chars because of different varieties)
-	 */
-	if (!strncmp(oem_id, "SGI4", 4)) {
-		uv_hub_info->hub_revision = UV4_HUB_REVISION_BASE;
+	switch (uv_hub_info->hub_revision) {
+	case UV4_HUB_REVISION_BASE:
 		uv_hubbed_system = 0x11;
+		uv_hub_type_set(UV4);
+		break;
 
-	} else if (!strncmp(oem_id, "SGI3", 4)) {
-		uv_hub_info->hub_revision = UV3_HUB_REVISION_BASE;
+	case UV3_HUB_REVISION_BASE:
 		uv_hubbed_system = 0x9;
+		uv_hub_type_set(UV3);
+		break;
 
-	} else if (!strcmp(oem_id, "SGI2")) {
-		uv_hub_info->hub_revision = UV2_HUB_REVISION_BASE;
+	case UV2_HUB_REVISION_BASE:
 		uv_hubbed_system = 0x5;
+		uv_hub_type_set(UV2);
+		break;
 
-	} else {
-		uv_hub_info->hub_revision = 0;
-		goto badbios;
+	default:
+		return 0;
 	}
 
-	pnodeid = early_get_pnodeid();
-	early_get_apic_socketid_shift();
+	/* Get UV hub chip part number & revision */
+	early_set_hub_type();
 
+	/* Other UV setup functions */
+	early_get_pnodeid();
+	early_get_apic_socketid_shift();
 	x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
 	x86_platform.nmi_init = uv_nmi_init;
+	uv_tsc_check_sync();
+
+	return 1;
+}
 
-	if (!strcmp(oem_table_id, "UVX")) {
-		/* This is the most common hardware variant: */
+/* Called early to probe for the correct APIC driver */
+static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
+{
+	/* Set up early hub info fields for Node 0 */
+	uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
+
+	/* If not UV, return. */
+	if (likely(uv_set_system_type(_oem_id) == 0))
+		return 0;
+
+	/* Save and Decode OEM Table ID */
+	uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
+
+	/* This is the most common hardware variant, x2apic mode */
+	if (!strcmp(oem_table_id, "UVX"))
 		uv_system_type = UV_X2APIC;
-		uv_apic = 0;
 
-	} else if (!strcmp(oem_table_id, "UVL")) {
-		/* Only used for very small systems:  */
+	/* Only used for very small systems, usually 1 chassis, legacy mode  */
+	else if (!strcmp(oem_table_id, "UVL"))
 		uv_system_type = UV_LEGACY_APIC;
-		uv_apic = 0;
 
-	} else {
+	else
 		goto badbios;
-	}
 
-	pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
-	uv_tsc_check_sync();
+	pr_info("UV: OEM IDs %s/%s, System/UVType %d/0x%x, HUB RevID %d\n",
+		oem_id, oem_table_id, uv_system_type, is_uv(UV_ANY),
+		uv_min_hub_revision_id);
 
-	return uv_apic;
+	return 0;
 
 badbios:
 	pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
-	pr_err("Current UV Type or BIOS not supported\n");
+	pr_err("UV: Current UV Type or BIOS not supported\n");
 	BUG();
 }
 
@@ -673,12 +719,12 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 };
 
 #define	UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH	3
-#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+#define DEST_SHIFT UVXH_RH_GAM_ALIAS_0_REDIRECT_CONFIG_DEST_BASE_SHFT
 
 static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 {
-	union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
-	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+	union uvh_rh_gam_alias_2_overlay_config_u alias;
+	union uvh_rh_gam_alias_2_redirect_config_u redirect;
 	unsigned long m_redirect;
 	unsigned long m_overlay;
 	int i;
@@ -686,16 +732,16 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 	for (i = 0; i < UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_LENGTH; i++) {
 		switch (i) {
 		case 0:
-			m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR;
-			m_overlay  = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
+			m_redirect = UVH_RH_GAM_ALIAS_0_REDIRECT_CONFIG;
+			m_overlay  = UVH_RH_GAM_ALIAS_0_OVERLAY_CONFIG;
 			break;
 		case 1:
-			m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR;
-			m_overlay  = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
+			m_redirect = UVH_RH_GAM_ALIAS_1_REDIRECT_CONFIG;
+			m_overlay  = UVH_RH_GAM_ALIAS_1_OVERLAY_CONFIG;
 			break;
 		case 2:
-			m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR;
-			m_overlay  = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
+			m_redirect = UVH_RH_GAM_ALIAS_2_REDIRECT_CONFIG;
+			m_overlay  = UVH_RH_GAM_ALIAS_2_OVERLAY_CONFIG;
 			break;
 		}
 		alias.v = uv_read_local_mmr(m_overlay);
@@ -730,12 +776,12 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
 
 static __init void map_gru_high(int max_pnode)
 {
-	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
-	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
-	unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK;
+	union uvh_rh_gam_gru_overlay_config_u gru;
+	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+	unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
 	unsigned long base;
 
-	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
 	if (!gru.s.enable) {
 		pr_info("UV: GRU disabled\n");
 		return;
@@ -749,10 +795,10 @@ static __init void map_gru_high(int max_pnode)
 
 static __init void map_mmr_high(int max_pnode)
 {
-	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
-	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+	union uvh_rh_gam_mmr_overlay_config_u mmr;
+	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
 
-	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
 	if (mmr.s.enable)
 		map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
 	else
@@ -773,29 +819,29 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
 
 	if (index == 0) {
 		id = "MMIOH0";
-		m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR;
+		m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0;
 		overlay = uv_read_local_mmr(m_overlay);
-		base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK;
-		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR;
-		m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK)
-			>> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
-		shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT;
-		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
-		nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK;
+		base = overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0;
+		m_io = (overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK)
+			>> UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT;
+		shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT;
+		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+		nasid_mask = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
 	} else {
 		id = "MMIOH1";
-		m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR;
+		m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1;
 		overlay = uv_read_local_mmr(m_overlay);
-		base = overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK;
-		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR;
-		m_io = (overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK)
-			>> UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
-		shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT;
-		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH;
-		nasid_mask = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK;
+		base = overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1;
+		m_io = (overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK)
+			>> UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT;
+		shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT;
+		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+		nasid_mask = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;
 	}
 	pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io);
-	if (!(overlay & UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK)) {
+	if (!(overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK)) {
 		pr_info("UV: %s disabled\n", id);
 		return;
 	}
@@ -855,7 +901,7 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
 
 static __init void map_mmioh_high(int min_pnode, int max_pnode)
 {
-	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+	union uvh_rh_gam_mmioh_overlay_config_u mmioh;
 	unsigned long mmr, base;
 	int shift, enable, m_io, n_io;
 
@@ -867,8 +913,8 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
 	}
 
 	if (is_uv2_hub()) {
-		mmr	= UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
-		shift	= UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+		mmr	= UVH_RH_GAM_MMIOH_OVERLAY_CONFIG;
+		shift	= UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT;
 		mmioh.v	= uv_read_local_mmr(mmr);
 		enable	= !!mmioh.s2.enable;
 		base	= mmioh.s2.base;
@@ -950,13 +996,13 @@ struct mn {
 
 static void get_mn(struct mn *mnp)
 {
-	union uvh_rh_gam_config_mmr_u m_n_config;
-	union uv3h_gr0_gam_gr_config_u m_gr_config;
+	union uvh_rh_gam_addr_map_config_u m_n_config;
+	union uvyh_gr0_gam_gr_config_u m_gr_config;
 
 	/* Make sure the whole structure is well initialized: */
 	memset(mnp, 0, sizeof(*mnp));
 
-	m_n_config.v	= uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
+	m_n_config.v	= uv_read_local_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);
 	mnp->n_val	= m_n_config.s.n_skt;
 
 	if (is_uv4_hub()) {
@@ -964,7 +1010,7 @@ static void get_mn(struct mn *mnp)
 		mnp->n_lshift	= 0;
 	} else if (is_uv3_hub()) {
 		mnp->m_val	= m_n_config.s3.m_skt;
-		m_gr_config.v	= uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
+		m_gr_config.v	= uv_read_local_mmr(UVH_GR0_GAM_GR_CONFIG);
 		mnp->n_lshift	= m_gr_config.s3.m_skt;
 	} else if (is_uv2_hub()) {
 		mnp->m_val	= m_n_config.s2.m_skt;
@@ -975,7 +1021,6 @@ static void get_mn(struct mn *mnp)
 
 static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 {
-	union uvh_node_id_u node_id;
 	struct mn mn;
 
 	get_mn(&mn);
@@ -988,6 +1033,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 	hi->m_shift		= mn.m_shift;
 	hi->n_lshift		= mn.n_lshift ? mn.n_lshift : 0;
 	hi->hub_revision	= uv_hub_info->hub_revision;
+	hi->hub_type		= uv_hub_info->hub_type;
 	hi->pnode_mask		= uv_cpuid.pnode_mask;
 	hi->min_pnode		= _min_pnode;
 	hi->min_socket		= _min_socket;
@@ -997,9 +1043,8 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 	hi->gr_table_len	= _gr_table_len;
 	hi->gr_table		= _gr_table;
 
-	node_id.v		= uv_read_local_mmr(UVH_NODE_ID);
 	uv_cpuid.gnode_shift	= max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val);
-	hi->gnode_extra		= (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
+	hi->gnode_extra		= (uv_node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
 	if (mn.m_val)
 		hi->gnode_upper	= (u64)hi->gnode_extra << mn.m_val;
 
@@ -1011,7 +1056,9 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 		hi->gpa_shift		= uv_gp_table->gpa_shift;
 		hi->gpa_mask		= (1UL << hi->gpa_shift) - 1;
 	} else {
-		hi->global_mmr_base	= uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE;
+		hi->global_mmr_base	=
+			uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG) &
+			~UV_MMR_ENABLE;
 		hi->global_mmr_shift	= _UV_GLOBAL_MMR64_PNODE_SHIFT;
 	}
 
@@ -1135,11 +1182,7 @@ static int __init decode_uv_systab(void)
 	return 0;
 }
 
-/*
- * Set up physical blade translations from UVH_NODE_PRESENT_TABLE
- * .. NB: UVH_NODE_PRESENT_TABLE is going away,
- * .. being replaced by GAM Range Table
- */
+/* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */
 static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
 {
 	int i, uv_pb = 0;
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 6c348c2d0def56..d9967350a1ab94 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -84,10 +84,8 @@ static void uv_rtc_send_IPI(int cpu)
 /* Check for an RTC interrupt pending */
 static int uv_intr_pending(int pnode)
 {
-	if (is_uvx_hub())
-		return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) &
-			UVXH_EVENT_OCCURRED2_RTC_1_MASK;
-	return 0;
+	return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED2) &
+		UVH_EVENT_OCCURRED2_RTC_1_MASK;
 }
 
 /* Setup interrupt and return non-zero if early expiration occurred. */
@@ -101,8 +99,8 @@ static int uv_setup_intr(int cpu, u64 expires)
 		UVH_RTC1_INT_CONFIG_M_MASK);
 	uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
 
-	uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS,
-			      UVXH_EVENT_OCCURRED2_RTC_1_MASK);
+	uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED2_ALIAS,
+			      UVH_EVENT_OCCURRED2_RTC_1_MASK);
 
 	val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
 		((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 93bb49ddda1fe5..18aa8c877bf813 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -516,7 +516,7 @@ static int __init gru_init(void)
 #if defined CONFIG_IA64
 	gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */
 #else
-	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR) &
+	gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG) &
 				0x7fffffffffffUL;
 #endif
 	gru_start_vaddr = __va(gru_start_paddr);

From 6c7794423a998478f6df0234d2dd5baa3ccbdb1d Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:21 -0500
Subject: [PATCH 242/262] x86/platform/uv: Add UV5 direct references

Add new references to UV5 (and UVY class) system MMR addresses and
fields primarily caused by the expansion from 46 to 52 bits of physical
memory address.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-6-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/uv_hub.h   | 49 ++++++++++-----
 arch/x86/kernel/apic/x2apic_uv_x.c | 97 +++++++++++++++++++++---------
 2 files changed, 103 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 76969be0966021..ecf5c93e7ae807 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -160,6 +160,7 @@ struct uv_hub_info_s {
 	unsigned char		gr_table_len;
 	unsigned char		apic_pnode_shift;
 	unsigned char		gpa_shift;
+	unsigned char		nasid_shift;
 	unsigned char		m_shift;
 	unsigned char		n_lshift;
 	unsigned int		gnode_extra;
@@ -226,6 +227,7 @@ static inline __init void uv_hub_type_set(int uvmask)
 #define UV3_HUB_REVISION_BASE		5
 #define UV4_HUB_REVISION_BASE		7
 #define UV4A_HUB_REVISION_BASE		8	/* UV4 (fixed) rev 2 */
+#define UV5_HUB_REVISION_BASE		9
 
 static inline int is_uv(int uvmask) { return uv_hub_type() & uvmask; }
 static inline int is_uv1_hub(void) { return 0; }
@@ -233,7 +235,7 @@ static inline int is_uv2_hub(void) { return is_uv(UV2); }
 static inline int is_uv3_hub(void) { return is_uv(UV3); }
 static inline int is_uv4a_hub(void) { return is_uv(UV4A); }
 static inline int is_uv4_hub(void) { return is_uv(UV4); }
-static inline int is_uv5_hub(void) { return 0; }
+static inline int is_uv5_hub(void) { return is_uv(UV5); }
 
 /*
  * UV4A is a revision of UV4.  So on UV4A, both is_uv4_hub() and
@@ -246,7 +248,7 @@ static inline int is_uv5_hub(void) { return 0; }
 static inline int is_uvx_hub(void) { return is_uv(UVX); }
 
 /* UVY class: UV5,..? */
-static inline int is_uvy_hub(void) { return 0; }
+static inline int is_uvy_hub(void) { return is_uv(UVY); }
 
 /* Any UV Hubbed System */
 static inline int is_uv_hub(void) { return is_uv(UV_ANY); }
@@ -271,9 +273,11 @@ union uvh_apicid {
  *		g -  GNODE (full 15-bit global nasid, right shifted 1)
  *		p -  PNODE (local part of nsids, right shifted 1)
  */
-#define UV_NASID_TO_PNODE(n)		(((n) >> 1) & uv_hub_info->pnode_mask)
+#define UV_NASID_TO_PNODE(n)		\
+		(((n) >> uv_hub_info->nasid_shift) & uv_hub_info->pnode_mask)
 #define UV_PNODE_TO_GNODE(p)		((p) |uv_hub_info->gnode_extra)
-#define UV_PNODE_TO_NASID(p)		(UV_PNODE_TO_GNODE(p) << 1)
+#define UV_PNODE_TO_NASID(p)		\
+		(UV_PNODE_TO_GNODE(p) << uv_hub_info->nasid_shift)
 
 #define UV2_LOCAL_MMR_BASE		0xfa000000UL
 #define UV2_GLOBAL_MMR32_BASE		0xfc000000UL
@@ -290,25 +294,38 @@ union uvh_apicid {
 #define UV4_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
 #define UV4_GLOBAL_MMR32_SIZE		0
 
+#define UV5_LOCAL_MMR_BASE		0xfa000000UL
+#define UV5_GLOBAL_MMR32_BASE		0
+#define UV5_LOCAL_MMR_SIZE		(32UL * 1024 * 1024)
+#define UV5_GLOBAL_MMR32_SIZE		0
+
 #define UV_LOCAL_MMR_BASE		(				\
-					is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \
-					is_uv3_hub() ? UV3_LOCAL_MMR_BASE : \
-					/*is_uv4_hub*/ UV4_LOCAL_MMR_BASE)
+					is_uv(UV2) ? UV2_LOCAL_MMR_BASE : \
+					is_uv(UV3) ? UV3_LOCAL_MMR_BASE : \
+					is_uv(UV4) ? UV4_LOCAL_MMR_BASE : \
+					is_uv(UV5) ? UV5_LOCAL_MMR_BASE : \
+					0)
 
 #define UV_GLOBAL_MMR32_BASE		(				\
-					is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE : \
-					is_uv3_hub() ? UV3_GLOBAL_MMR32_BASE : \
-					/*is_uv4_hub*/ UV4_GLOBAL_MMR32_BASE)
+					is_uv(UV2) ? UV2_GLOBAL_MMR32_BASE : \
+					is_uv(UV3) ? UV3_GLOBAL_MMR32_BASE : \
+					is_uv(UV4) ? UV4_GLOBAL_MMR32_BASE : \
+					is_uv(UV5) ? UV5_GLOBAL_MMR32_BASE : \
+					0)
 
 #define UV_LOCAL_MMR_SIZE		(				\
-					is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \
-					is_uv3_hub() ? UV3_LOCAL_MMR_SIZE : \
-					/*is_uv4_hub*/ UV4_LOCAL_MMR_SIZE)
+					is_uv(UV2) ? UV2_LOCAL_MMR_SIZE : \
+					is_uv(UV3) ? UV3_LOCAL_MMR_SIZE : \
+					is_uv(UV4) ? UV4_LOCAL_MMR_SIZE : \
+					is_uv(UV5) ? UV5_LOCAL_MMR_SIZE : \
+					0)
 
 #define UV_GLOBAL_MMR32_SIZE		(				\
-					is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE : \
-					is_uv3_hub() ? UV3_GLOBAL_MMR32_SIZE : \
-					/*is_uv4_hub*/ UV4_GLOBAL_MMR32_SIZE)
+					is_uv(UV2) ? UV2_GLOBAL_MMR32_SIZE : \
+					is_uv(UV3) ? UV3_GLOBAL_MMR32_SIZE : \
+					is_uv(UV4) ? UV4_GLOBAL_MMR32_SIZE : \
+					is_uv(UV5) ? UV5_GLOBAL_MMR32_SIZE : \
+					0)
 
 #define UV_GLOBAL_MMR64_BASE		(uv_hub_info->global_mmr_base)
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index d357711072a186..fca5f94d055e7d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -35,14 +35,17 @@ static int			uv_node_id;
 static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
 static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
 
-/* Information derived from CPUID: */
+/* Information derived from CPUID and some UV MMRs */
 static struct {
 	unsigned int apicid_shift;
 	unsigned int apicid_mask;
 	unsigned int socketid_shift;	/* aka pnode_shift for UV2/3 */
 	unsigned int pnode_mask;
+	unsigned int nasid_shift;
 	unsigned int gpa_shift;
 	unsigned int gnode_shift;
+	unsigned int m_skt;
+	unsigned int n_skt;
 } uv_cpuid;
 
 static int uv_min_hub_revision_id;
@@ -88,20 +91,43 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
 
 static void __init early_get_pnodeid(void)
 {
-	union uvh_rh_gam_addr_map_config_u  m_n_config;
 	int pnode;
 
+	uv_cpuid.m_skt = 0;
+	if (UVH_RH10_GAM_ADDR_MAP_CONFIG) {
+		union uvh_rh10_gam_addr_map_config_u  m_n_config;
+
+		m_n_config.v = uv_early_read_mmr(UVH_RH10_GAM_ADDR_MAP_CONFIG);
+		uv_cpuid.n_skt = m_n_config.s.n_skt;
+		uv_cpuid.nasid_shift = 0;
+	} else if (UVH_RH_GAM_ADDR_MAP_CONFIG) {
+		union uvh_rh_gam_addr_map_config_u  m_n_config;
+
 	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);
+		uv_cpuid.n_skt = m_n_config.s.n_skt;
+		if (is_uv(UV3))
+			uv_cpuid.m_skt = m_n_config.s3.m_skt;
+		if (is_uv(UV2))
+			uv_cpuid.m_skt = m_n_config.s2.m_skt;
+		uv_cpuid.nasid_shift = 1;
+	} else {
+		unsigned long GAM_ADDR_MAP_CONFIG = 0;
+
+		WARN(GAM_ADDR_MAP_CONFIG == 0,
+			"UV: WARN: GAM_ADDR_MAP_CONFIG is not available\n");
+		uv_cpuid.n_skt = 0;
+		uv_cpuid.nasid_shift = 0;
+	}
 
-	if (is_uv4_hub())
+	if (is_uv(UV4|UVY))
 		uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
 
-	uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
-	pnode = (uv_node_id >> 1) & uv_cpuid.pnode_mask;
+	uv_cpuid.pnode_mask = (1 << uv_cpuid.n_skt) - 1;
+	pnode = (uv_node_id >> uv_cpuid.nasid_shift) & uv_cpuid.pnode_mask;
 	uv_cpuid.gpa_shift = 46;	/* Default unless changed */
 
 	pr_info("UV: n_skt:%d pnmsk:%x pn:%x\n",
-		m_n_config.s.n_skt, uv_cpuid.pnode_mask, pnode);
+		uv_cpuid.n_skt, uv_cpuid.pnode_mask, pnode);
 }
 
 /* Running on a UV Hubbed system, determine which UV Hub Type it is */
@@ -121,6 +147,12 @@ static int __init early_set_hub_type(void)
 
 	switch (node_id.s.part_number) {
 
+	case UV5_HUB_PART_NUMBER:
+		uv_min_hub_revision_id = node_id.s.revision
+					 + UV5_HUB_REVISION_BASE;
+		uv_hub_type_set(UV5);
+		break;
+
 	/* UV4/4A only have a revision difference */
 	case UV4_HUB_PART_NUMBER:
 		uv_min_hub_revision_id = node_id.s.revision
@@ -282,11 +314,17 @@ static int __init uv_set_system_type(char *_oem_id)
 
 	/* Set hubbed type if true */
 	uv_hub_info->hub_revision =
+		!strncmp(oem_id, "SGI5", 4) ? UV5_HUB_REVISION_BASE :
 		!strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
 		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
 		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : 0;
 
 	switch (uv_hub_info->hub_revision) {
+	case UV5_HUB_REVISION_BASE:
+		uv_hubbed_system = 0x21;
+		uv_hub_type_set(UV5);
+		break;
+
 	case UV4_HUB_REVISION_BASE:
 		uv_hubbed_system = 0x11;
 		uv_hub_type_set(UV4);
@@ -923,7 +961,8 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
 
 		if (enable) {
 			max_pnode &= (1 << n_io) - 1;
-			pr_info("UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
+			pr_info(
+			"UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
 				base, shift, m_io, n_io, max_pnode);
 			map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
 		} else {
@@ -934,8 +973,11 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
 
 static __init void map_low_mmrs(void)
 {
-	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
-	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+	if (UV_GLOBAL_MMR32_BASE)
+		init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+
+	if (UV_LOCAL_MMR_BASE)
+		init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
 }
 
 static __init void uv_rtc_init(void)
@@ -994,26 +1036,22 @@ struct mn {
 	unsigned char	n_lshift;
 };
 
+/* Initialize caller's MN struct and fill in values */
 static void get_mn(struct mn *mnp)
 {
-	union uvh_rh_gam_addr_map_config_u m_n_config;
-	union uvyh_gr0_gam_gr_config_u m_gr_config;
-
-	/* Make sure the whole structure is well initialized: */
 	memset(mnp, 0, sizeof(*mnp));
-
-	m_n_config.v	= uv_read_local_mmr(UVH_RH_GAM_ADDR_MAP_CONFIG);
-	mnp->n_val	= m_n_config.s.n_skt;
-
-	if (is_uv4_hub()) {
+	mnp->n_val	= uv_cpuid.n_skt;
+	if (is_uv(UV4|UVY)) {
 		mnp->m_val	= 0;
 		mnp->n_lshift	= 0;
 	} else if (is_uv3_hub()) {
-		mnp->m_val	= m_n_config.s3.m_skt;
+		union uvyh_gr0_gam_gr_config_u m_gr_config;
+
+		mnp->m_val	= uv_cpuid.m_skt;
 		m_gr_config.v	= uv_read_local_mmr(UVH_GR0_GAM_GR_CONFIG);
 		mnp->n_lshift	= m_gr_config.s3.m_skt;
 	} else if (is_uv2_hub()) {
-		mnp->m_val	= m_n_config.s2.m_skt;
+		mnp->m_val	= uv_cpuid.m_skt;
 		mnp->n_lshift	= mnp->m_val == 40 ? 40 : 39;
 	}
 	mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
@@ -1035,6 +1073,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 	hi->hub_revision	= uv_hub_info->hub_revision;
 	hi->hub_type		= uv_hub_info->hub_type;
 	hi->pnode_mask		= uv_cpuid.pnode_mask;
+	hi->nasid_shift		= uv_cpuid.nasid_shift;
 	hi->min_pnode		= _min_pnode;
 	hi->min_socket		= _min_socket;
 	hi->pnode_to_socket	= _pnode_to_socket;
@@ -1146,16 +1185,19 @@ static int __init decode_uv_systab(void)
 	struct uv_systab *st;
 	int i;
 
-	/* If system is uv3 or lower, there is no extended UVsystab */
-	if (is_uv_hubbed(0xfffffe) < uv(4) && is_uv_hubless(0xfffffe) < uv(4))
-		return 0;	/* No extended UVsystab required */
-
+	/* Get mapped UVsystab pointer */
 	st = uv_systab;
+
+	/* If UVsystab is version 1, there is no extended UVsystab */
+	if (st && st->revision == UV_SYSTAB_VERSION_1)
+		return 0;
+
 	if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) {
 		int rev = st ? st->revision : 0;
 
-		pr_err("UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", rev, UV_SYSTAB_VERSION_UV4_LATEST);
-		pr_err("UV: Cannot support UV operations, switching to generic PC\n");
+		pr_err("UV: BIOS UVsystab mismatch, (%x < %x)\n",
+			rev, UV_SYSTAB_VERSION_UV4_LATEST);
+		pr_err("UV: Does not support UV, switch to non-UV x86_64\n");
 		uv_system_type = UV_NONE;
 
 		return -EINVAL;
@@ -1393,7 +1435,8 @@ static void __init uv_system_init_hub(void)
 	struct uv_hub_info_s hub_info = {0};
 	int bytes, cpu, nodeid;
 	unsigned short min_pnode = 9999, max_pnode = 0;
-	char *hub = is_uv4_hub() ? "UV400" :
+	char *hub = is_uv5_hub() ? "UV500" :
+		    is_uv4_hub() ? "UV400" :
 		    is_uv3_hub() ? "UV300" :
 		    is_uv2_hub() ? "UV2000/3000" : NULL;
 

From 1e61f5a95f1913c015a2d6a1544c108248b3971c Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:22 -0500
Subject: [PATCH 243/262] x86/platform/uv: Add and decode Arch Type in UVsystab

When the UV BIOS starts the kernel it passes the UVsystab info struct to
the kernel which contains information elements more specific than ACPI,
and generally pertinent only to the MMRs. These are read only fields
so information is passed one way only. A new field starting with UV5 is
the UV architecture type so the ACPI OEM_ID field can be used for other
purposes going forward. The UV Arch Type selects the entirety of the
MMRs available, with their addresses and fields defined in uv_mmrs.h.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-7-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/bios.h     |  16 +++-
 arch/x86/kernel/apic/x2apic_uv_x.c | 135 +++++++++++++++++++++++++----
 arch/x86/platform/uv/bios_uv.c     |  27 ++++--
 3 files changed, 148 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 70050d0136c3f6..97ac595ebc6a37 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -5,8 +5,8 @@
 /*
  * UV BIOS layer definitions.
  *
- *  Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Russ Anderson <rja@sgi.com>
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
  */
 
 #include <linux/rtc.h>
@@ -71,6 +71,11 @@ struct uv_gam_range_entry {
 	u32	limit;		/* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
 };
 
+#define	UV_AT_SIZE	8	/* 7 character arch type + NULL char */
+struct uv_arch_type_entry {
+	char	archtype[UV_AT_SIZE];
+};
+
 #define	UV_SYSTAB_SIG			"UVST"
 #define	UV_SYSTAB_VERSION_1		1	/* UV2/3 BIOS version */
 #define	UV_SYSTAB_VERSION_UV4		0x400	/* UV4 BIOS base version */
@@ -79,10 +84,14 @@ struct uv_gam_range_entry {
 #define	UV_SYSTAB_VERSION_UV4_3		0x403	/* - GAM Range PXM Value */
 #define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_3
 
+#define	UV_SYSTAB_VERSION_UV5		0x500	/* UV5 GAM base version */
+#define	UV_SYSTAB_VERSION_UV5_LATEST	UV_SYSTAB_VERSION_UV5
+
 #define	UV_SYSTAB_TYPE_UNUSED		0	/* End of table (offset == 0) */
 #define	UV_SYSTAB_TYPE_GAM_PARAMS	1	/* GAM PARAM conversions */
 #define	UV_SYSTAB_TYPE_GAM_RNG_TBL	2	/* GAM entry table */
-#define	UV_SYSTAB_TYPE_MAX		3
+#define	UV_SYSTAB_TYPE_ARCH_TYPE	3	/* UV arch type */
+#define	UV_SYSTAB_TYPE_MAX		4
 
 /*
  * The UV system table describes specific firmware
@@ -133,6 +142,7 @@ extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
 extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
 
 extern int uv_bios_init(void);
+extern unsigned long get_uv_systab_phys(bool msg);
 
 extern unsigned long sn_rtc_cycles_per_second;
 extern int uv_type;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index fca5f94d055e7d..9b7a334578e614 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -31,7 +31,8 @@ static u64			gru_start_paddr, gru_end_paddr;
 static union uvh_apicid		uvh_apicid;
 static int			uv_node_id;
 
-/* Unpack OEM/TABLE ID's to be NULL terminated strings */
+/* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */
+static u8 uv_archtype[UV_AT_SIZE];
 static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
 static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
 
@@ -284,18 +285,102 @@ static void __init uv_stringify(int len, char *to, char *from)
 	strncpy(to, from, len-1);
 }
 
+/* Find UV arch type entry in UVsystab */
+static unsigned long __init early_find_archtype(struct uv_systab *st)
+{
+	int i;
+
+	for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
+		unsigned long ptr = st->entry[i].offset;
+
+		if (!ptr)
+			continue;
+		ptr += (unsigned long)st;
+		if (st->entry[i].type == UV_SYSTAB_TYPE_ARCH_TYPE)
+			return ptr;
+	}
+	return 0;
+}
+
+/* Validate UV arch type field in UVsystab */
+static int __init decode_arch_type(unsigned long ptr)
+{
+	struct uv_arch_type_entry *uv_ate = (struct uv_arch_type_entry *)ptr;
+	int n = strlen(uv_ate->archtype);
+
+	if (n > 0 && n < sizeof(uv_ate->archtype)) {
+		pr_info("UV: UVarchtype received from BIOS\n");
+		uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype);
+		return 1;
+	}
+	return 0;
+}
+
+/* Determine if UV arch type entry might exist in UVsystab */
+static int __init early_get_arch_type(void)
+{
+	unsigned long uvst_physaddr, uvst_size, ptr;
+	struct uv_systab *st;
+	u32 rev;
+	int ret;
+
+	uvst_physaddr = get_uv_systab_phys(0);
+	if (!uvst_physaddr)
+		return 0;
+
+	st = early_memremap_ro(uvst_physaddr, sizeof(struct uv_systab));
+	if (!st) {
+		pr_err("UV: Cannot access UVsystab, remap failed\n");
+		return 0;
+	}
+
+	rev = st->revision;
+	if (rev < UV_SYSTAB_VERSION_UV5) {
+		early_memunmap(st, sizeof(struct uv_systab));
+		return 0;
+	}
+
+	uvst_size = st->size;
+	early_memunmap(st, sizeof(struct uv_systab));
+	st = early_memremap_ro(uvst_physaddr, uvst_size);
+	if (!st) {
+		pr_err("UV: Cannot access UVarchtype, remap failed\n");
+		return 0;
+	}
+
+	ptr = early_find_archtype(st);
+	if (!ptr) {
+		early_memunmap(st, uvst_size);
+		return 0;
+	}
+
+	ret = decode_arch_type(ptr);
+	early_memunmap(st, uvst_size);
+	return ret;
+}
+
 static int __init uv_set_system_type(char *_oem_id)
 {
-	/* Save OEM ID */
+	/* Save OEM_ID passed from ACPI MADT */
 	uv_stringify(sizeof(oem_id), oem_id, _oem_id);
 
-	/* Set hubless type if true */
-	if (strncmp(oem_id, "SGI", 3) != 0) {
-		if (strncmp(oem_id, "NSGI", 4) != 0)
+	/* Check if BIOS sent us a UVarchtype */
+	if (!early_get_arch_type())
+
+		/* If not use OEM ID for UVarchtype */
+		uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id);
+
+	/* Check if not hubbed */
+	if (strncmp(uv_archtype, "SGI", 3) != 0) {
+
+		/* (Not hubbed), check if not hubless */
+		if (strncmp(uv_archtype, "NSGI", 4) != 0)
+
+			/* (Not hubless), not a UV */
 			return 0;
 
 		/* UV4 Hubless: CH */
-		if (strncmp(oem_id, "NSGI4", 5) == 0)
+		if (strncmp(uv_archtype, "NSGI4", 5) == 0)
 			uv_hubless_system = 0x11;
 
 		/* UV3 Hubless: UV300/MC990X w/o hub */
@@ -314,10 +399,10 @@ static int __init uv_set_system_type(char *_oem_id)
 
 	/* Set hubbed type if true */
 	uv_hub_info->hub_revision =
-		!strncmp(oem_id, "SGI5", 4) ? UV5_HUB_REVISION_BASE :
-		!strncmp(oem_id, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
-		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
-		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE : 0;
+		!strncmp(uv_archtype, "SGI5", 4) ? UV5_HUB_REVISION_BASE :
+		!strncmp(uv_archtype, "SGI4", 4) ? UV4_HUB_REVISION_BASE :
+		!strncmp(uv_archtype, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+		!strcmp(uv_archtype, "SGI2") ? UV2_HUB_REVISION_BASE : 0;
 
 	switch (uv_hub_info->hub_revision) {
 	case UV5_HUB_REVISION_BASE:
@@ -388,8 +473,7 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
 	return 0;
 
 badbios:
-	pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
-	pr_err("UV: Current UV Type or BIOS not supported\n");
+	pr_err("UV: UVarchtype:%s not supported\n", uv_archtype);
 	BUG();
 }
 
@@ -1180,6 +1264,7 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 	pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode);
 }
 
+/* Walk through UVsystab decoding the fields */
 static int __init decode_uv_systab(void)
 {
 	struct uv_systab *st;
@@ -1209,7 +1294,8 @@ static int __init decode_uv_systab(void)
 		if (!ptr)
 			continue;
 
-		ptr = ptr + (unsigned long)st;
+		/* point to payload */
+		ptr += (unsigned long)st;
 
 		switch (st->entry[i].type) {
 		case UV_SYSTAB_TYPE_GAM_PARAMS:
@@ -1219,6 +1305,15 @@ static int __init decode_uv_systab(void)
 		case UV_SYSTAB_TYPE_GAM_RNG_TBL:
 			decode_gam_rng_tbl(ptr);
 			break;
+
+		case UV_SYSTAB_TYPE_ARCH_TYPE:
+			/* already processed in early startup */
+			break;
+
+		default:
+			pr_err("UV:%s:Unrecognized UV_SYSTAB_TYPE:%d, skipped\n",
+				__func__, st->entry[i].type);
+			break;
 		}
 	}
 	return 0;
@@ -1259,7 +1354,7 @@ static void __init build_socket_tables(void)
 			pr_info("UV: No UVsystab socket table, ignoring\n");
 			return;
 		}
-		pr_crit("UV: Error: UVsystab address translations not available!\n");
+		pr_err("UV: Error: UVsystab address translations not available!\n");
 		BUG();
 	}
 
@@ -1385,9 +1480,9 @@ static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data)
 	return 0;
 }
 
-static int __maybe_unused proc_oemid_show(struct seq_file *file, void *data)
+static int __maybe_unused proc_archtype_show(struct seq_file *file, void *data)
 {
-	seq_printf(file, "%s/%s\n", oem_id, oem_table_id);
+	seq_printf(file, "%s/%s\n", uv_archtype, oem_table_id);
 	return 0;
 }
 
@@ -1396,7 +1491,7 @@ static __init void uv_setup_proc_files(int hubless)
 	struct proc_dir_entry *pde;
 
 	pde = proc_mkdir(UV_PROC_NODE, NULL);
-	proc_create_single("oemid", 0, pde, proc_oemid_show);
+	proc_create_single("archtype", 0, pde, proc_archtype_show);
 	if (hubless)
 		proc_create_single("hubless", 0, pde, proc_hubless_show);
 	else
@@ -1448,12 +1543,14 @@ static void __init uv_system_init_hub(void)
 
 	map_low_mmrs();
 
-	/* Get uv_systab for decoding: */
+	/* Get uv_systab for decoding, setup UV BIOS calls */
 	uv_bios_init();
 
 	/* If there's an UVsystab problem then abort UV init: */
-	if (decode_uv_systab() < 0)
+	if (decode_uv_systab() < 0) {
+		pr_err("UV: Mangled UVsystab format\n");
 		return;
+	}
 
 	build_socket_tables();
 	build_uv_gr_table();
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index a2f447dffea6c8..b148b4c8c2ec32 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -2,8 +2,8 @@
 /*
  * BIOS run time interface routines.
  *
- *  Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Russ Anderson <rja@sgi.com>
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
  */
 
 #include <linux/efi.h>
@@ -170,16 +170,27 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
 				(u64)decode, (u64)domain, (u64)bus, 0, 0);
 }
 
-int uv_bios_init(void)
+unsigned long get_uv_systab_phys(bool msg)
 {
-	uv_systab = NULL;
 	if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
 	    !uv_systab_phys || efi_runtime_disabled()) {
-		pr_crit("UV: UVsystab: missing\n");
-		return -EEXIST;
+		if (msg)
+			pr_crit("UV: UVsystab: missing\n");
+		return 0;
 	}
+	return uv_systab_phys;
+}
+
+int uv_bios_init(void)
+{
+	unsigned long uv_systab_phys_addr;
+
+	uv_systab = NULL;
+	uv_systab_phys_addr = get_uv_systab_phys(1);
+	if (!uv_systab_phys_addr)
+		return -EEXIST;
 
-	uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
+	uv_systab = ioremap(uv_systab_phys_addr, sizeof(struct uv_systab));
 	if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
 		pr_err("UV: UVsystab: bad signature!\n");
 		iounmap(uv_systab);
@@ -191,7 +202,7 @@ int uv_bios_init(void)
 		int size = uv_systab->size;
 
 		iounmap(uv_systab);
-		uv_systab = ioremap(uv_systab_phys, size);
+		uv_systab = ioremap(uv_systab_phys_addr, size);
 		if (!uv_systab) {
 			pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
 			return -EFAULT;

From ffe2febca4304b9288e2d274d2ece5e66c125441 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:23 -0500
Subject: [PATCH 244/262] x86/platform/uv: Update MMIOH references based on new
 UV5 MMRs

Make modifications to the MMIOH mappings to accommodate changes for UV5.

[ Fix W=1 build warnings. ]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-8-mike.travis@hpe.com
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 212 ++++++++++++++++++++---------
 1 file changed, 144 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9b7a334578e614..e2e866a13f5274 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -226,6 +226,13 @@ static void __init uv_tsc_check_sync(void)
 		mark_tsc_unstable("UV BIOS");
 }
 
+/* Selector for (4|4A|5) structs */
+#define uvxy_field(sname, field, undef) (	\
+	is_uv(UV4A) ? sname.s4a.field :		\
+	is_uv(UV4) ? sname.s4.field :		\
+	is_uv(UV3) ? sname.s3.field :		\
+	undef)
+
 /* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
 
 #define SMT_LEVEL			0	/* Leaf 0xb SMT level */
@@ -878,6 +885,7 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
 }
 
 enum map_type {map_wb, map_uc};
+static const char * const mt[] = { "WB", "UC" };
 
 static __init void map_high(char *id, unsigned long base, int pshift, int bshift, int max_pnode, enum map_type map_type)
 {
@@ -889,11 +897,13 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
 		pr_info("UV: Map %s_HI base address NULL\n", id);
 		return;
 	}
-	pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
 	if (map_type == map_uc)
 		init_extra_mapping_uc(paddr, bytes);
 	else
 		init_extra_mapping_wb(paddr, bytes);
+
+	pr_info("UV: Map %s_HI 0x%lx - 0x%lx %s (%d segments)\n",
+		id, paddr, paddr + bytes, mt[map_type], max_pnode + 1);
 }
 
 static __init void map_gru_high(int max_pnode)
@@ -927,52 +937,74 @@ static __init void map_mmr_high(int max_pnode)
 		pr_info("UV: MMR disabled\n");
 }
 
-/* UV3/4 have identical MMIOH overlay configs, UV4A is slightly different */
-static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
-{
-	unsigned long overlay;
-	unsigned long mmr;
-	unsigned long base;
-	unsigned long nasid_mask;
-	unsigned long m_overlay;
-	int i, n, shift, m_io, max_io;
-	int nasid, lnasid, fi, li;
-	char *id;
-
-	if (index == 0) {
-		id = "MMIOH0";
-		m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0;
-		overlay = uv_read_local_mmr(m_overlay);
-		base = overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+/* Arch specific ENUM cases */
+enum mmioh_arch {
+	UV2_MMIOH = -1,
+	UVY_MMIOH0, UVY_MMIOH1,
+	UVX_MMIOH0, UVX_MMIOH1,
+};
+
+/* Calculate and Map MMIOH Regions */
+static void __init calc_mmioh_map(enum mmioh_arch index,
+	int min_pnode, int max_pnode,
+	int shift, unsigned long base, int m_io, int n_io)
+{
+	unsigned long mmr, nasid_mask;
+	int nasid, min_nasid, max_nasid, lnasid, mapped;
+	int i, fi, li, n, max_io;
+	char id[8];
+
+	/* One (UV2) mapping */
+	if (index == UV2_MMIOH) {
+		strncpy(id, "MMIOH", sizeof(id));
+		max_io = max_pnode;
+		mapped = 0;
+		goto map_exit;
+	}
+
+	/* small and large MMIOH mappings */
+	switch (index) {
+	case UVY_MMIOH0:
+		mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0;
+		nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
+		n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
+		min_nasid = min_pnode;
+		max_nasid = max_pnode;
+		mapped = 1;
+		break;
+	case UVY_MMIOH1:
+		mmr = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1;
+		nasid_mask = UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+		n = UVH_RH10_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
+		min_nasid = min_pnode;
+		max_nasid = max_pnode;
+		mapped = 1;
+		break;
+	case UVX_MMIOH0:
 		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0;
-		m_io = (overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_MASK)
-			>> UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT;
-		shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_M_IO_SHFT;
+		nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_MASK;
 		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG0_DEPTH;
-		nasid_mask = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_NASID_MASK;
-	} else {
-		id = "MMIOH1";
-		m_overlay = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1;
-		overlay = uv_read_local_mmr(m_overlay);
-		base = overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
+		min_nasid = min_pnode * 2;
+		max_nasid = max_pnode * 2;
+		mapped = 1;
+		break;
+	case UVX_MMIOH1:
 		mmr = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1;
-		m_io = (overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_MASK)
-			>> UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT;
-		shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_M_IO_SHFT;
+		nasid_mask = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_MASK;
 		n = UVH_RH_GAM_MMIOH_REDIRECT_CONFIG1_DEPTH;
-		nasid_mask = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_NASID_MASK;
-	}
-	pr_info("UV: %s overlay 0x%lx base:0x%lx m_io:%d\n", id, overlay, base, m_io);
-	if (!(overlay & UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_ENABLE_MASK)) {
-		pr_info("UV: %s disabled\n", id);
+		min_nasid = min_pnode * 2;
+		max_nasid = max_pnode * 2;
+		mapped = 1;
+		break;
+	default:
+		pr_err("UV:%s:Invalid mapping type:%d\n", __func__, index);
 		return;
 	}
 
-	/* Convert to NASID: */
-	min_pnode *= 2;
-	max_pnode *= 2;
-	max_io = lnasid = fi = li = -1;
+	/* enum values chosen so (index mod 2) is MMIOH 0/1 (low/high) */
+	snprintf(id, sizeof(id), "MMIOH%d", index%2);
 
+	max_io = lnasid = fi = li = -1;
 	for (i = 0; i < n; i++) {
 		unsigned long m_redirect = mmr + i * 8;
 		unsigned long redirect = uv_read_local_mmr(m_redirect);
@@ -982,9 +1014,12 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
 			pr_info("UV: %s redirect base 0x%lx(@0x%lx) 0x%04x\n",
 				id, redirect, m_redirect, nasid);
 
-		/* Invalid NASID: */
-		if (nasid < min_pnode || max_pnode < nasid)
+		/* Invalid NASID check */
+		if (nasid < min_nasid || max_nasid < nasid) {
+			pr_err("UV:%s:Invalid NASID:%x (range:%x..%x)\n",
+				__func__, index, min_nasid, max_nasid);
 			nasid = -1;
+		}
 
 		if (nasid == lnasid) {
 			li = i;
@@ -1007,7 +1042,8 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
 			}
 			addr1 = (base << shift) + f * (1ULL << m_io);
 			addr2 = (base << shift) + (l + 1) * (1ULL << m_io);
-			pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", id, fi, li, lnasid, addr1, addr2);
+			pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
+				id, fi, li, lnasid, addr1, addr2);
 			if (max_io < l)
 				max_io = l;
 		}
@@ -1015,43 +1051,83 @@ static __init void map_mmioh_high_uv34(int index, int min_pnode, int max_pnode)
 		lnasid = nasid;
 	}
 
-	pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", id, base, shift, m_io, max_io);
+map_exit:
+	pr_info("UV: %s base:0x%lx shift:%d m_io:%d max_io:%d max_pnode:0x%x\n",
+		id, base, shift, m_io, max_io, max_pnode);
 
-	if (max_io >= 0)
+	if (max_io >= 0 && !mapped)
 		map_high(id, base, shift, m_io, max_io, map_uc);
 }
 
 static __init void map_mmioh_high(int min_pnode, int max_pnode)
 {
-	union uvh_rh_gam_mmioh_overlay_config_u mmioh;
-	unsigned long mmr, base;
-	int shift, enable, m_io, n_io;
+	/* UVY flavor */
+	if (UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0) {
+		union uvh_rh10_gam_mmioh_overlay_config0_u mmioh0;
+		union uvh_rh10_gam_mmioh_overlay_config1_u mmioh1;
+
+		mmioh0.v = uv_read_local_mmr(UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0);
+		if (unlikely(mmioh0.s.enable == 0))
+			pr_info("UV: MMIOH0 disabled\n");
+		else
+			calc_mmioh_map(UVY_MMIOH0, min_pnode, max_pnode,
+				UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT,
+				mmioh0.s.base, mmioh0.s.m_io, mmioh0.s.n_io);
 
-	if (is_uv3_hub() || is_uv4_hub()) {
-		/* Map both MMIOH regions: */
-		map_mmioh_high_uv34(0, min_pnode, max_pnode);
-		map_mmioh_high_uv34(1, min_pnode, max_pnode);
+		mmioh1.v = uv_read_local_mmr(UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1);
+		if (unlikely(mmioh1.s.enable == 0))
+			pr_info("UV: MMIOH1 disabled\n");
+		else
+			calc_mmioh_map(UVY_MMIOH1, min_pnode, max_pnode,
+				UVH_RH10_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT,
+				mmioh1.s.base, mmioh1.s.m_io, mmioh1.s.n_io);
 		return;
 	}
+	/* UVX flavor */
+	if (UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0) {
+		union uvh_rh_gam_mmioh_overlay_config0_u mmioh0;
+		union uvh_rh_gam_mmioh_overlay_config1_u mmioh1;
+
+		mmioh0.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0);
+		if (unlikely(mmioh0.s.enable == 0))
+			pr_info("UV: MMIOH0 disabled\n");
+		else {
+			unsigned long base = uvxy_field(mmioh0, base, 0);
+			int m_io = uvxy_field(mmioh0, m_io, 0);
+			int n_io = uvxy_field(mmioh0, n_io, 0);
+
+			calc_mmioh_map(UVX_MMIOH0, min_pnode, max_pnode,
+				UVH_RH_GAM_MMIOH_OVERLAY_CONFIG0_BASE_SHFT,
+				base, m_io, n_io);
+		}
 
-	if (is_uv2_hub()) {
-		mmr	= UVH_RH_GAM_MMIOH_OVERLAY_CONFIG;
-		shift	= UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT;
-		mmioh.v	= uv_read_local_mmr(mmr);
-		enable	= !!mmioh.s2.enable;
-		base	= mmioh.s2.base;
-		m_io	= mmioh.s2.m_io;
-		n_io	= mmioh.s2.n_io;
-
-		if (enable) {
-			max_pnode &= (1 << n_io) - 1;
-			pr_info(
-			"UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
-				base, shift, m_io, n_io, max_pnode);
-			map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
-		} else {
-			pr_info("UV: MMIOH disabled\n");
+		mmioh1.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1);
+		if (unlikely(mmioh1.s.enable == 0))
+			pr_info("UV: MMIOH1 disabled\n");
+		else {
+			unsigned long base = uvxy_field(mmioh1, base, 0);
+			int m_io = uvxy_field(mmioh1, m_io, 0);
+			int n_io = uvxy_field(mmioh1, n_io, 0);
+
+			calc_mmioh_map(UVX_MMIOH1, min_pnode, max_pnode,
+				UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_BASE_SHFT,
+				base, m_io, n_io);
 		}
+		return;
+	}
+
+	/* UV2 flavor */
+	if (UVH_RH_GAM_MMIOH_OVERLAY_CONFIG) {
+		union uvh_rh_gam_mmioh_overlay_config_u mmioh;
+
+		mmioh.v	= uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG);
+		if (unlikely(mmioh.s2.enable == 0))
+			pr_info("UV: MMIOH disabled\n");
+		else
+			calc_mmioh_map(UV2_MMIOH, min_pnode, max_pnode,
+				UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_BASE_SHFT,
+				mmioh.s2.base, mmioh.s2.m_io, mmioh.s2.n_io);
+		return;
 	}
 }
 

From 8540b2cf0de09b6d96b7dce56a16e26ab4fe8a9b Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:24 -0500
Subject: [PATCH 245/262] x86/platform/uv: Adjust GAM MMR references affected
 by UV5 updates

Make modifications to the GAM MMR mappings to accommodate changes for UV5.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-9-mike.travis@hpe.com
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index e2e866a13f5274..5aed07ffcd7812 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -927,12 +927,32 @@ static __init void map_gru_high(int max_pnode)
 
 static __init void map_mmr_high(int max_pnode)
 {
-	union uvh_rh_gam_mmr_overlay_config_u mmr;
-	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+	unsigned long base;
+	int shift;
+	bool enable;
+
+	if (UVH_RH10_GAM_MMR_OVERLAY_CONFIG) {
+		union uvh_rh10_gam_mmr_overlay_config_u mmr;
+
+		mmr.v = uv_read_local_mmr(UVH_RH10_GAM_MMR_OVERLAY_CONFIG);
+		enable = mmr.s.enable;
+		base = mmr.s.base;
+		shift = UVH_RH10_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+	} else if (UVH_RH_GAM_MMR_OVERLAY_CONFIG) {
+		union uvh_rh_gam_mmr_overlay_config_u mmr;
+
+		mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
+		enable = mmr.s.enable;
+		base = mmr.s.base;
+		shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_BASE_SHFT;
+	} else {
+		pr_err("UV:%s:RH_GAM_MMR_OVERLAY_CONFIG MMR undefined?\n",
+			__func__);
+		return;
+	}
 
-	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG);
-	if (mmr.s.enable)
-		map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+	if (enable)
+		map_high("MMR", base, shift, shift, max_pnode, map_uc);
 	else
 		pr_info("UV: MMR disabled\n");
 }

From a74a7e992caf0745f548a63b263ac34c6a4a29dd Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:25 -0500
Subject: [PATCH 246/262] x86/platform/uv: Update UV5 MMR references in UV GRU

Make modifications to the GRU mappings to accommodate changes for UV5.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-10-mike.travis@hpe.com
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 5aed07ffcd7812..64a1c597f2134d 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -82,6 +82,9 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
 
 static inline bool is_GRU_range(u64 start, u64 end)
 {
+	if (!gru_start_paddr)
+		return false;
+
 	return start >= gru_start_paddr && end <= gru_end_paddr;
 }
 
@@ -909,13 +912,24 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift
 static __init void map_gru_high(int max_pnode)
 {
 	union uvh_rh_gam_gru_overlay_config_u gru;
-	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
-	unsigned long mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
-	unsigned long base;
+	unsigned long mask, base;
+	int shift;
+
+	if (UVH_RH_GAM_GRU_OVERLAY_CONFIG) {
+		gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
+		shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+		mask = UVH_RH_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+	} else if (UVH_RH10_GAM_GRU_OVERLAY_CONFIG) {
+		gru.v = uv_read_local_mmr(UVH_RH10_GAM_GRU_OVERLAY_CONFIG);
+		shift = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_SHFT;
+		mask = UVH_RH10_GAM_GRU_OVERLAY_CONFIG_BASE_MASK;
+	} else {
+		pr_err("UV: GRU unavailable (no MMR)\n");
+		return;
+	}
 
-	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG);
 	if (!gru.s.enable) {
-		pr_info("UV: GRU disabled\n");
+		pr_info("UV: GRU disabled (by BIOS)\n");
 		return;
 	}
 
@@ -1288,7 +1302,11 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
 	/* Show system specific info: */
 	pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", hi->n_val, hi->m_val, hi->m_shift, hi->n_lshift);
 	pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", hi->gpa_mask, hi->gpa_shift, hi->pnode_mask, hi->apic_pnode_shift);
-	pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift, hi->global_gru_base, hi->global_gru_shift);
+	pr_info("UV: mmr_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift);
+	if (hi->global_gru_base)
+		pr_info("UV: gru_base/shift:0x%lx/%ld\n",
+			hi->global_gru_base, hi->global_gru_shift);
+
 	pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", hi->gnode_upper, hi->gnode_extra);
 }
 

From d6922effe4f3d5c643c8c05d51a572d6db4c9cb3 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:26 -0500
Subject: [PATCH 247/262] x86/platform/uv: Update node present counting

The changes in the UV5 arch shrunk the NODE PRESENT table to just 2x64
entries (128 total) so are in to 64 bit MMRs instead of a depth of 64
bits in an array.  Adjust references when counting up the nodes present.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-11-mike.travis@hpe.com
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 64a1c597f2134d..9b44f45704f796 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -1436,20 +1436,32 @@ static int __init decode_uv_systab(void)
 /* Set up physical blade translations from UVH_NODE_PRESENT_TABLE */
 static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
 {
+	unsigned long np;
 	int i, uv_pb = 0;
 
-	pr_info("UV: NODE_PRESENT_DEPTH = %d\n", UVH_NODE_PRESENT_TABLE_DEPTH);
-	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
-		unsigned long np;
-
-		np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
-		if (np)
+	if (UVH_NODE_PRESENT_TABLE) {
+		pr_info("UV: NODE_PRESENT_DEPTH = %d\n",
+			UVH_NODE_PRESENT_TABLE_DEPTH);
+		for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+			np = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
 			pr_info("UV: NODE_PRESENT(%d) = 0x%016lx\n", i, np);
-
+			uv_pb += hweight64(np);
+		}
+	}
+	if (UVH_NODE_PRESENT_0) {
+		np = uv_read_local_mmr(UVH_NODE_PRESENT_0);
+		pr_info("UV: NODE_PRESENT_0 = 0x%016lx\n", np);
+		uv_pb += hweight64(np);
+	}
+	if (UVH_NODE_PRESENT_1) {
+		np = uv_read_local_mmr(UVH_NODE_PRESENT_1);
+		pr_info("UV: NODE_PRESENT_1 = 0x%016lx\n", np);
 		uv_pb += hweight64(np);
 	}
 	if (uv_possible_blades != uv_pb)
 		uv_possible_blades = uv_pb;
+
+	pr_info("UV: number nodes/possible blades %d\n", uv_pb);
 }
 
 static void __init build_socket_tables(void)

From 6a7cf55e9f2b743695adac84375548aa18112327 Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:27 -0500
Subject: [PATCH 248/262] x86/platform/uv: Update UV5 TSC checking

Update check of BIOS TSC sync status to include both possible "invalid"
states provided by newer UV5 BIOS.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-12-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/uv_hub.h   |  2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c | 24 ++++++++++--------------
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index ecf5c93e7ae807..07079b59824d4c 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -726,7 +726,7 @@ extern void uv_nmi_setup_hubless(void);
 #define UVH_TSC_SYNC_SHIFT_UV2K	16	/* UV2/3k have different bits */
 #define UVH_TSC_SYNC_MASK	3	/* 0011 */
 #define UVH_TSC_SYNC_VALID	3	/* 0011 */
-#define UVH_TSC_SYNC_INVALID	2	/* 0010 */
+#define UVH_TSC_SYNC_UNKNOWN	0	/* 0000 */
 
 /* BMC sets a bit this MMR non-zero before sending an NMI */
 #define UVH_NMI_MMR		UVH_BIOS_KERNEL_MMR
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9b44f45704f796..9a83aa193bdf04 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -197,36 +197,32 @@ static void __init uv_tsc_check_sync(void)
 	int sync_state;
 	int mmr_shift;
 	char *state;
-	bool valid;
 
-	/* Accommodate different UV arch BIOSes */
+	/* Different returns from different UV BIOS versions */
 	mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
 	mmr_shift =
 		is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
 	sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
 
+	/* Check if TSC is valid for all sockets */
 	switch (sync_state) {
 	case UVH_TSC_SYNC_VALID:
 		state = "in sync";
-		valid = true;
+		mark_tsc_async_resets("UV BIOS");
 		break;
 
-	case UVH_TSC_SYNC_INVALID:
-		state = "unstable";
-		valid = false;
+	/* If BIOS state unknown, don't do anything */
+	case UVH_TSC_SYNC_UNKNOWN:
+		state = "unknown";
 		break;
+
+	/* Otherwise, BIOS indicates problem with TSC */
 	default:
-		state = "unknown: assuming valid";
-		valid = true;
+		state = "unstable";
+		mark_tsc_unstable("UV BIOS");
 		break;
 	}
 	pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
-
-	/* Mark flag that says TSC != 0 is valid for socket 0 */
-	if (valid)
-		mark_tsc_async_resets("UV BIOS");
-	else
-		mark_tsc_unstable("UV BIOS");
 }
 
 /* Selector for (4|4A|5) structs */

From ae5f8ce3c247b8d937782e76802a9036c09998ad Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:28 -0500
Subject: [PATCH 249/262] x86/platform/uv: Update for UV5 NMI MMR changes

The UV NMI MMR addresses and fields moved between UV4 and UV5
necessitating a rewrite of the UV NMI handler.  Adjust references
to accommodate those changes.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lkml.kernel.org/r/20201005203929.148656-13-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/uv_hub.h | 13 -------
 arch/x86/platform/uv/uv_nmi.c    | 64 +++++++++++++++++++++++++++-----
 2 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 07079b59824d4c..610bda21a8d9ea 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -734,19 +734,6 @@ extern void uv_nmi_setup_hubless(void);
 #define UVH_NMI_MMR_SHIFT	63
 #define UVH_NMI_MMR_TYPE	"SCRATCH5"
 
-/* Newer SMM NMI handler, not present in all systems */
-#define UVH_NMI_MMRX		UVH_EVENT_OCCURRED0
-#define UVH_NMI_MMRX_CLEAR	UVH_EVENT_OCCURRED0_ALIAS
-#define UVH_NMI_MMRX_SHIFT	UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT
-#define UVH_NMI_MMRX_TYPE	"EXTIO_INT0"
-
-/* Non-zero indicates newer SMM NMI handler present */
-#define UVH_NMI_MMRX_SUPPORTED	UVH_EXTIO_INT0_BROADCAST
-
-/* Indicates to BIOS that we want to use the newer SMM NMI handler */
-#define UVH_NMI_MMRX_REQ	UVH_BIOS_KERNEL_MMR_ALIAS_2
-#define UVH_NMI_MMRX_REQ_SHIFT	62
-
 struct uv_hub_nmi_s {
 	raw_spinlock_t	nmi_lock;
 	atomic_t	in_nmi;		/* flag this node in UV NMI IRQ */
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index 9d08ff5a755eb6..eac26feb04612f 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -2,8 +2,8 @@
 /*
  * SGI NMI support routines
  *
- *  Copyright (c) 2009-2013 Silicon Graphics, Inc.  All Rights Reserved.
- *  Copyright (c) Mike Travis
+ * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (c) Mike Travis
  */
 
 #include <linux/cpu.h>
@@ -54,6 +54,20 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list;
 
 DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi);
 
+/* Newer SMM NMI handler, not present in all systems */
+static unsigned long uvh_nmi_mmrx;		/* UVH_EVENT_OCCURRED0/1 */
+static unsigned long uvh_nmi_mmrx_clear;	/* UVH_EVENT_OCCURRED0/1_ALIAS */
+static int uvh_nmi_mmrx_shift;			/* UVH_EVENT_OCCURRED0/1_EXTIO_INT0_SHFT */
+static int uvh_nmi_mmrx_mask;			/* UVH_EVENT_OCCURRED0/1_EXTIO_INT0_MASK */
+static char *uvh_nmi_mmrx_type;			/* "EXTIO_INT0" */
+
+/* Non-zero indicates newer SMM NMI handler present */
+static unsigned long uvh_nmi_mmrx_supported;	/* UVH_EXTIO_INT0_BROADCAST */
+
+/* Indicates to BIOS that we want to use the newer SMM NMI handler */
+static unsigned long uvh_nmi_mmrx_req;		/* UVH_BIOS_KERNEL_MMR_ALIAS_2 */
+static int uvh_nmi_mmrx_req_shift;		/* 62 */
+
 /* UV hubless values */
 #define NMI_CONTROL_PORT	0x70
 #define NMI_DUMMY_PORT		0x71
@@ -227,13 +241,43 @@ static inline bool uv_nmi_action_is(const char *action)
 /* Setup which NMI support is present in system */
 static void uv_nmi_setup_mmrs(void)
 {
-	if (uv_read_local_mmr(UVH_NMI_MMRX_SUPPORTED)) {
-		uv_write_local_mmr(UVH_NMI_MMRX_REQ,
-					1UL << UVH_NMI_MMRX_REQ_SHIFT);
-		nmi_mmr = UVH_NMI_MMRX;
-		nmi_mmr_clear = UVH_NMI_MMRX_CLEAR;
-		nmi_mmr_pending = 1UL << UVH_NMI_MMRX_SHIFT;
-		pr_info("UV: SMI NMI support: %s\n", UVH_NMI_MMRX_TYPE);
+	/* First determine arch specific MMRs to handshake with BIOS */
+	if (UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK) {
+		uvh_nmi_mmrx = UVH_EVENT_OCCURRED0;
+		uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED0_ALIAS;
+		uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT;
+		uvh_nmi_mmrx_mask = UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK;
+		uvh_nmi_mmrx_type = "OCRD0-EXTIO_INT0";
+
+		uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+		uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+		uvh_nmi_mmrx_req_shift = 62;
+
+	} else if (UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK) {
+		uvh_nmi_mmrx = UVH_EVENT_OCCURRED1;
+		uvh_nmi_mmrx_clear = UVH_EVENT_OCCURRED1_ALIAS;
+		uvh_nmi_mmrx_shift = UVH_EVENT_OCCURRED1_EXTIO_INT0_SHFT;
+		uvh_nmi_mmrx_mask = UVH_EVENT_OCCURRED1_EXTIO_INT0_MASK;
+		uvh_nmi_mmrx_type = "OCRD1-EXTIO_INT0";
+
+		uvh_nmi_mmrx_supported = UVH_EXTIO_INT0_BROADCAST;
+		uvh_nmi_mmrx_req = UVH_BIOS_KERNEL_MMR_ALIAS_2;
+		uvh_nmi_mmrx_req_shift = 62;
+
+	} else {
+		pr_err("UV:%s:cannot find EVENT_OCCURRED*_EXTIO_INT0\n",
+			__func__);
+		return;
+	}
+
+	/* Then find out if new NMI is supported */
+	if (likely(uv_read_local_mmr(uvh_nmi_mmrx_supported))) {
+		uv_write_local_mmr(uvh_nmi_mmrx_req,
+					1UL << uvh_nmi_mmrx_req_shift);
+		nmi_mmr = uvh_nmi_mmrx;
+		nmi_mmr_clear = uvh_nmi_mmrx_clear;
+		nmi_mmr_pending = 1UL << uvh_nmi_mmrx_shift;
+		pr_info("UV: SMI NMI support: %s\n", uvh_nmi_mmrx_type);
 	} else {
 		nmi_mmr = UVH_NMI_MMR;
 		nmi_mmr_clear = UVH_NMI_MMR_CLEAR;
@@ -1049,5 +1093,5 @@ void __init uv_nmi_setup_hubless(void)
 	/* Ensure NMI enabled in Processor Interface Reg: */
 	uv_reassert_nmi();
 	uv_register_nmi_notifier();
-	pr_info("UV: Hubless NMI enabled\n");
+	pr_info("UV: PCH NMI enabled\n");
 }

From 7a6d94f0ed957fb667d4d74c5c6c640a26e87c8f Mon Sep 17 00:00:00 2001
From: Mike Travis <mike.travis@hpe.com>
Date: Mon, 5 Oct 2020 15:39:29 -0500
Subject: [PATCH 250/262] x86/platform/uv: Update Copyrights to conform to HPE
 standards

Add Copyrights to those files that have been updated for UV5 changes.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201005203929.148656-14-mike.travis@hpe.com
---
 arch/x86/include/asm/uv/bios.h      | 1 +
 arch/x86/include/asm/uv/uv_hub.h    | 1 +
 arch/x86/include/asm/uv/uv_mmrs.h   | 1 +
 arch/x86/kernel/apic/x2apic_uv_x.c  | 1 +
 arch/x86/platform/uv/bios_uv.c      | 1 +
 arch/x86/platform/uv/uv_nmi.c       | 1 +
 arch/x86/platform/uv/uv_time.c      | 1 +
 drivers/misc/sgi-gru/grufile.c      | 1 +
 drivers/misc/sgi-xp/xp.h            | 1 +
 drivers/misc/sgi-xp/xp_main.c       | 1 +
 drivers/misc/sgi-xp/xp_uv.c         | 1 +
 drivers/misc/sgi-xp/xpc_main.c      | 1 +
 drivers/misc/sgi-xp/xpc_partition.c | 1 +
 drivers/misc/sgi-xp/xpnet.c         | 1 +
 14 files changed, 14 insertions(+)

diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 97ac595ebc6a37..08b3d810dfbaf7 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -5,6 +5,7 @@
 /*
  * UV BIOS layer definitions.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
  * Copyright (c) Russ Anderson <rja@sgi.com>
  */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 610bda21a8d9ea..5002f52be33284 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -5,6 +5,7 @@
  *
  * SGI UV architectural definitions
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
  */
 
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index 06ea2d1aaa3e44..57fa6737326296 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -5,6 +5,7 @@
  *
  * HPE UV MMR definitions
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2016 Silicon Graphics, Inc. All rights reserved.
  */
 
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 9a83aa193bdf04..714233cee0b576 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,6 +5,7 @@
  *
  * SGI UV APIC functions (note: not an Intel compatible APIC)
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
  */
 #include <linux/crash_dump.h>
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index b148b4c8c2ec32..54511eaccf4d8e 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -2,6 +2,7 @@
 /*
  * BIOS run time interface routines.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
  * Copyright (c) Russ Anderson <rja@sgi.com>
  */
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index eac26feb04612f..0f5cbcf0da632a 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -2,6 +2,7 @@
 /*
  * SGI NMI support routines
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2007-2017 Silicon Graphics, Inc. All rights reserved.
  * Copyright (c) Mike Travis
  */
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index d9967350a1ab94..54663f3e00cbd8 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -2,6 +2,7 @@
 /*
  * SGI RTC clock/timer routines.
  *
+ *  (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  *  Copyright (c) 2009-2013 Silicon Graphics, Inc.  All Rights Reserved.
  *  Copyright (c) Dimitri Sivanich
  */
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 18aa8c877bf813..7ffcfc0bb58723 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -7,6 +7,7 @@
  * This file supports the user system call for file open, close, mmap, etc.
  * This also incudes the driver initialization code.
  *
+ *  (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  *  Copyright (c) 2008-2014 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 2b6aabc61c6a95..9f9af77f8d2e13 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xp_main.c b/drivers/misc/sgi-xp/xp_main.c
index 0eea2f518967db..cf2965aa5c0556 100644
--- a/drivers/misc/sgi-xp/xp_main.c
+++ b/drivers/misc/sgi-xp/xp_main.c
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c
index 5dcca03b26cb39..19fc7076af274f 100644
--- a/drivers/misc/sgi-xp/xp_uv.c
+++ b/drivers/misc/sgi-xp/xp_uv.c
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index e3261e63d1f653..e5244fc1dab302 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index a47b3bd75c08f9..57df06820bae2b 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 8ee39910b30942..23837d0d6f4a10 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -3,6 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
+ * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
  * Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved.
  */
 

From 41ce0564bfe2e129d56730418d8c0a9f9f2d31b5 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Tue, 6 Oct 2020 14:09:05 -0700
Subject: [PATCH 251/262] x86/mce: Pass pointer to saved pt_regs to severity
 calculation routines

New recovery features require additional information about processor
state when a machine check occurred. Pass pt_regs down to the routines
that need it.

No functional change.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-2-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/core.c     | 14 +++++++-------
 arch/x86/kernel/cpu/mce/internal.h |  3 ++-
 arch/x86/kernel/cpu/mce/severity.c | 14 ++++++++------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5b70f4b351dae..2d6caf09e8e6a9 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -807,7 +807,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 			goto clear_it;
 
 		mce_read_aux(&m, i);
-		m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+		m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
 		/*
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
@@ -856,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			quirk_no_way_out(i, m, regs);
 
 		m->bank = i;
-		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+		if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
 			mce_read_aux(m, i);
 			*msg = tmp;
 			return 1;
@@ -956,7 +956,7 @@ static void mce_reign(void)
 	 */
 	if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
 		/* call mce_severity() to get "msg" for panic */
-		mce_severity(m, mca_cfg.tolerant, &msg, true);
+		mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
 		mce_panic("Fatal machine check", m, msg);
 	}
 
@@ -1167,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
 	return false;
 }
 
-static void __mc_scan_banks(struct mce *m, struct mce *final,
+static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
 			    unsigned long *toclear, unsigned long *valid_banks,
 			    int no_way_out, int *worst)
 {
@@ -1202,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
 		/* Set taint even when machine check was not enabled. */
 		add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
-		severity = mce_severity(m, cfg->tolerant, NULL, true);
+		severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
 
 		/*
 		 * When machine check was for corrected/deferred handler don't
@@ -1354,7 +1354,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		order = mce_start(&no_way_out);
 	}
 
-	__mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
+	__mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
 
 	if (!no_way_out)
 		mce_clear_state(toclear);
@@ -1376,7 +1376,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		 * make sure we have the right "msg".
 		 */
 		if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
-			mce_severity(&m, cfg->tolerant, &msg, true);
+			mce_severity(&m, regs, cfg->tolerant, &msg, true);
 			mce_panic("Local fatal machine check!", &m, msg);
 		}
 	}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b122610e9046a5..88dcc79cfb07de 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
 int mce_gen_pool_init(void);
 struct llist_node *mce_gen_pool_prepare_records(void);
 
-extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
+			   int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern mce_banks_t mce_banks_ce_disabled;
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index e0722461bb579a..0b072dc231ad0c 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -223,7 +223,7 @@ static struct severity {
  * distinguish an exception taken in user from from one
  * taken in the kernel.
  */
-static int error_context(struct mce *m)
+static int error_context(struct mce *m, struct pt_regs *regs)
 {
 	if ((m->cs & 3) == 3)
 		return IN_USER;
@@ -267,9 +267,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
  * See AMD Error Scope Hierarchy table in a newer BKDG. For example
  * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
  */
-static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
+			    char **msg, bool is_excp)
 {
-	enum context ctx = error_context(m);
+	enum context ctx = error_context(m, regs);
 
 	/* Processor Context Corrupt, no need to fumble too much, die! */
 	if (m->status & MCI_STATUS_PCC)
@@ -319,10 +320,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
 	return MCE_KEEP_SEVERITY;
 }
 
-static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
+			      int tolerant, char **msg, bool is_excp)
 {
 	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
-	enum context ctx = error_context(m);
+	enum context ctx = error_context(m, regs);
 	struct severity *s;
 
 	for (s = severities;; s++) {
@@ -356,7 +358,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
 }
 
 /* Default to mce_severity_intel */
-int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
 		    mce_severity_intel;
 
 void __init mcheck_vendor_init_severity(void)

From a05d54c41ecfa1a322b229b4e5ce50c157284f74 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 6 Oct 2020 14:09:06 -0700
Subject: [PATCH 252/262] x86/mce: Provide method to find out the type of an
 exception handler

Avoid a proliferation of ex_has_*_handler() functions by having just
one function that returns the type of the handler (if any).

Drop the __visible attribute for this function. It is not called
from assembler so the attribute is not necessary.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-3-tony.luck@intel.com
---
 arch/x86/include/asm/extable.h     |  9 ++++++++-
 arch/x86/kernel/cpu/mce/severity.c |  5 ++++-
 arch/x86/mm/extable.c              | 12 ++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198d543b46..1f0cbc52937ca5 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,10 +29,17 @@ struct pt_regs;
 		(b)->handler = (tmp).handler - (delta);		\
 	} while (0)
 
+enum handler_type {
+	EX_HANDLER_NONE,
+	EX_HANDLER_FAULT,
+	EX_HANDLER_UACCESS,
+	EX_HANDLER_OTHER
+};
+
 extern int fixup_exception(struct pt_regs *regs, int trapnr,
 			   unsigned long error_code, unsigned long fault_addr);
 extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
 extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
 
 #endif
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 0b072dc231ad0c..c6494e6579c1e9 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -225,10 +225,13 @@ static struct severity {
  */
 static int error_context(struct mce *m, struct pt_regs *regs)
 {
+	enum handler_type t;
+
 	if ((m->cs & 3) == 3)
 		return IN_USER;
 
-	if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
+	t = ex_get_fault_handler_type(m->ip);
+	if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
 		m->kflags |= MCE_IN_KERNEL_RECOV;
 		return IN_KERNEL_RECOV;
 	}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1d6cb07f4f86f5..de43525df69b6a 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -125,17 +125,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL(ex_handler_clear_fs);
 
-__visible bool ex_has_fault_handler(unsigned long ip)
+enum handler_type ex_get_fault_handler_type(unsigned long ip)
 {
 	const struct exception_table_entry *e;
 	ex_handler_t handler;
 
 	e = search_exception_tables(ip);
 	if (!e)
-		return false;
+		return EX_HANDLER_NONE;
 	handler = ex_fixup_handler(e);
-
-	return handler == ex_handler_fault;
+	if (handler == ex_handler_fault)
+		return EX_HANDLER_FAULT;
+	else if (handler == ex_handler_uaccess)
+		return EX_HANDLER_UACCESS;
+	else
+		return EX_HANDLER_OTHER;
 }
 
 int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,

From 278b917f8cb9b02923c15249f9d1a5769d2c1976 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Tue, 6 Oct 2020 14:09:07 -0700
Subject: [PATCH 253/262] x86/mce: Add _ASM_EXTABLE_CPY for copy user access

_ASM_EXTABLE_UA is a general exception entry to record the exception fixup
for all exception spots between kernel and user space access.

To enable recovery from machine checks while coping data from user
addresses it is necessary to be able to distinguish the places that are
looping copying data from those that copy a single byte/word/etc.

Add a new macro _ASM_EXTABLE_CPY and use it in place of _ASM_EXTABLE_UA
in the copy functions.

Record the exception reason number to regs->ax at
ex_handler_uaccess which is used to check MCE triggered.

The new fixup routine ex_handler_copy() is almost an exact copy of
ex_handler_uaccess() The difference is that it sets regs->ax to the trap
number. Following patches use this to avoid trying to copy remaining
bytes from the tail of the copy and possibly hitting the poison again.

New mce.kflags bit MCE_IN_KERNEL_COPYIN will be used by mce_severity()
calculation to indicate that a machine check is recoverable because the
kernel was copying from user space.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-4-tony.luck@intel.com
---
 arch/x86/include/asm/asm.h  |  6 +++
 arch/x86/include/asm/mce.h  | 15 ++++++
 arch/x86/lib/copy_user_64.S | 96 ++++++++++++++++++-------------------
 arch/x86/mm/extable.c       | 14 +++++-
 4 files changed, 82 insertions(+), 49 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 5c15f95b1ba75a..0359cbbd0f50c1 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -135,6 +135,9 @@
 # define _ASM_EXTABLE_UA(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
 
+# define _ASM_EXTABLE_CPY(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
 # define _ASM_EXTABLE_FAULT(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
 
@@ -160,6 +163,9 @@
 # define _ASM_EXTABLE_UA(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
 
+# define _ASM_EXTABLE_CPY(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
 # define _ASM_EXTABLE_FAULT(from, to)				\
 	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
 
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ba2062d6df92a8..a0f147893a0414 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -136,8 +136,23 @@
 #define	MCE_HANDLED_NFIT	BIT_ULL(3)
 #define	MCE_HANDLED_EDAC	BIT_ULL(4)
 #define	MCE_HANDLED_MCELOG	BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
 #define MCE_IN_KERNEL_RECOV	BIT_ULL(6)
 
+/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN	BIT_ULL(7)
+
 /*
  * This structure contains all data related to the MCE log.  Also
  * carries a signature to make it easier to find from external
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 816f128a6d5272..5b68e945bf655a 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -36,8 +36,8 @@
 	jmp .Lcopy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE_UA(100b, 103b)
-	_ASM_EXTABLE_UA(101b, 103b)
+	_ASM_EXTABLE_CPY(100b, 103b)
+	_ASM_EXTABLE_CPY(101b, 103b)
 	.endm
 
 /*
@@ -116,26 +116,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
 60:	jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
 	.previous
 
-	_ASM_EXTABLE_UA(1b, 30b)
-	_ASM_EXTABLE_UA(2b, 30b)
-	_ASM_EXTABLE_UA(3b, 30b)
-	_ASM_EXTABLE_UA(4b, 30b)
-	_ASM_EXTABLE_UA(5b, 30b)
-	_ASM_EXTABLE_UA(6b, 30b)
-	_ASM_EXTABLE_UA(7b, 30b)
-	_ASM_EXTABLE_UA(8b, 30b)
-	_ASM_EXTABLE_UA(9b, 30b)
-	_ASM_EXTABLE_UA(10b, 30b)
-	_ASM_EXTABLE_UA(11b, 30b)
-	_ASM_EXTABLE_UA(12b, 30b)
-	_ASM_EXTABLE_UA(13b, 30b)
-	_ASM_EXTABLE_UA(14b, 30b)
-	_ASM_EXTABLE_UA(15b, 30b)
-	_ASM_EXTABLE_UA(16b, 30b)
-	_ASM_EXTABLE_UA(18b, 40b)
-	_ASM_EXTABLE_UA(19b, 40b)
-	_ASM_EXTABLE_UA(21b, 50b)
-	_ASM_EXTABLE_UA(22b, 50b)
+	_ASM_EXTABLE_CPY(1b, 30b)
+	_ASM_EXTABLE_CPY(2b, 30b)
+	_ASM_EXTABLE_CPY(3b, 30b)
+	_ASM_EXTABLE_CPY(4b, 30b)
+	_ASM_EXTABLE_CPY(5b, 30b)
+	_ASM_EXTABLE_CPY(6b, 30b)
+	_ASM_EXTABLE_CPY(7b, 30b)
+	_ASM_EXTABLE_CPY(8b, 30b)
+	_ASM_EXTABLE_CPY(9b, 30b)
+	_ASM_EXTABLE_CPY(10b, 30b)
+	_ASM_EXTABLE_CPY(11b, 30b)
+	_ASM_EXTABLE_CPY(12b, 30b)
+	_ASM_EXTABLE_CPY(13b, 30b)
+	_ASM_EXTABLE_CPY(14b, 30b)
+	_ASM_EXTABLE_CPY(15b, 30b)
+	_ASM_EXTABLE_CPY(16b, 30b)
+	_ASM_EXTABLE_CPY(18b, 40b)
+	_ASM_EXTABLE_CPY(19b, 40b)
+	_ASM_EXTABLE_CPY(21b, 50b)
+	_ASM_EXTABLE_CPY(22b, 50b)
 SYM_FUNC_END(copy_user_generic_unrolled)
 EXPORT_SYMBOL(copy_user_generic_unrolled)
 
@@ -180,8 +180,8 @@ SYM_FUNC_START(copy_user_generic_string)
 	jmp .Lcopy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE_UA(1b, 11b)
-	_ASM_EXTABLE_UA(3b, 12b)
+	_ASM_EXTABLE_CPY(1b, 11b)
+	_ASM_EXTABLE_CPY(3b, 12b)
 SYM_FUNC_END(copy_user_generic_string)
 EXPORT_SYMBOL(copy_user_generic_string)
 
@@ -213,7 +213,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
 	jmp .Lcopy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE_UA(1b, 12b)
+	_ASM_EXTABLE_CPY(1b, 12b)
 SYM_FUNC_END(copy_user_enhanced_fast_string)
 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 
@@ -237,7 +237,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 	ASM_CLAC
 	ret
 
-	_ASM_EXTABLE_UA(1b, 2b)
+	_ASM_EXTABLE_CPY(1b, 2b)
 SYM_CODE_END(.Lcopy_user_handle_tail)
 
 /*
@@ -366,27 +366,27 @@ SYM_FUNC_START(__copy_user_nocache)
 	jmp .Lcopy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
-	_ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
-	_ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
-	_ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
-	_ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
-	_ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
-	_ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
+	_ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
+	_ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
+	_ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
+	_ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
+	_ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
+	_ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
+	_ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
 SYM_FUNC_END(__copy_user_nocache)
 EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index de43525df69b6a..5829457f7ca339 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -80,6 +80,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL(ex_handler_uaccess);
 
+__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
+			       struct pt_regs *regs, int trapnr,
+			       unsigned long error_code,
+			       unsigned long fault_addr)
+{
+	WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+	regs->ip = ex_fixup_addr(fixup);
+	regs->ax = trapnr;
+	return true;
+}
+EXPORT_SYMBOL(ex_handler_copy);
+
 __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
 				       struct pt_regs *regs, int trapnr,
 				       unsigned long error_code,
@@ -136,7 +148,7 @@ enum handler_type ex_get_fault_handler_type(unsigned long ip)
 	handler = ex_fixup_handler(e);
 	if (handler == ex_handler_fault)
 		return EX_HANDLER_FAULT;
-	else if (handler == ex_handler_uaccess)
+	else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
 		return EX_HANDLER_UACCESS;
 	else
 		return EX_HANDLER_OTHER;

From a2f73400e4dfd13f673c6e1b4b98d180fd1e47b3 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 6 Oct 2020 14:09:08 -0700
Subject: [PATCH 254/262] x86/mce: Avoid tail copy when machine check
 terminated a copy from user

In the page fault case it is ok to see if a few more unaligned bytes
can be copied from the source address. Worst case is that the page fault
will be triggered again.

Machine checks are more serious. Just give up at the point where the
main copy loop triggered the #MC and return from the copy code as if
the copy succeeded. The machine check handler will use task_work_add() to
make sure that the task is sent a SIGBUS.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-5-tony.luck@intel.com
---
 arch/x86/lib/copy_user_64.S | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 5b68e945bf655a..77b9b2a3b5c84d 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,6 +15,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/export.h>
+#include <asm/trapnr.h>
 
 .macro ALIGN_DESTINATION
 	/* check for bad alignment of destination */
@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  * Try to copy last bytes and clear the rest if needed.
  * Since protection fault in copy_from/to_user is not a normal situation,
  * it is not necessary to optimize tail handling.
+ * Don't try to copy the tail if machine check happened
  *
  * Input:
  * rdi destination
@@ -232,11 +234,24 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  */
 SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
 	movl %edx,%ecx
+	cmp $X86_TRAP_MC,%eax		/* check if X86_TRAP_MC */
+	je 3f
 1:	rep movsb
 2:	mov %ecx,%eax
 	ASM_CLAC
 	ret
 
+	/*
+	 * Return zero to pretend that this copy succeeded. This
+	 * is counter-intuitive, but needed to prevent the code
+	 * in lib/iov_iter.c from retrying and running back into
+	 * the poison cache line again. The machine check handler
+	 * will ensure that a SIGBUS is sent to the task.
+	 */
+3:	xorl %eax,%eax
+	ASM_CLAC
+	ret
+
 	_ASM_EXTABLE_CPY(1b, 2b)
 SYM_CODE_END(.Lcopy_user_handle_tail)
 

From c0ab7ffce275d3f83bd253c70889c28821d4a41d Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 6 Oct 2020 14:09:09 -0700
Subject: [PATCH 255/262] x86/mce: Recover from poison found while copying from
 user space

Existing kernel code can only recover from a machine check on code that
is tagged in the exception table with a fault handling recovery path.

Add two new fields in the task structure to pass information from
machine check handler to the "task_work" that is queued to run before
the task returns to user mode:

+ mce_vaddr: will be initialized to the user virtual address of the fault
  in the case where the fault occurred in the kernel copying data from
  a user address.  This is so that kill_me_maybe() can provide that
  information to the user SIGBUS handler.

+ mce_kflags: copy of the struct mce.kflags needed by kill_me_maybe()
  to determine if mce_vaddr is applicable to this error.

Add code to recover from a machine check while copying data from user
space to the kernel. Action for this case is the same as if the user
touched the poison directly; unmap the page and send a SIGBUS to the task.

Use a new helper function to share common code between the "fault
in user mode" case and the "fault while copying from user" case.

New code paths will be activated by the next patch which sets
MCE_IN_KERNEL_COPYIN.

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-6-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/core.c | 27 ++++++++++++++++++++-------
 include/linux/sched.h          |  2 ++
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2d6caf09e8e6a9..5c423c4bab6828 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1260,6 +1260,21 @@ static void kill_me_maybe(struct callback_head *cb)
 	kill_me_now(cb);
 }
 
+static void queue_task_work(struct mce *m, int kill_it)
+{
+	current->mce_addr = m->addr;
+	current->mce_kflags = m->kflags;
+	current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+	current->mce_whole_page = whole_page(m);
+
+	if (kill_it)
+		current->mce_kill_me.func = kill_me_now;
+	else
+		current->mce_kill_me.func = kill_me_maybe;
+
+	task_work_add(current, &current->mce_kill_me, true);
+}
+
 /*
  * The actual machine check handler. This only handles real
  * exceptions when something got corrupted coming in through int 18.
@@ -1401,13 +1416,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		/* If this triggers there is no way to recover. Die hard. */
 		BUG_ON(!on_thread_stack() || !user_mode(regs));
 
-		current->mce_addr = m.addr;
-		current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
-		current->mce_whole_page = whole_page(&m);
-		current->mce_kill_me.func = kill_me_maybe;
-		if (kill_it)
-			current->mce_kill_me.func = kill_me_now;
-		task_work_add(current, &current->mce_kill_me, true);
+		queue_task_work(&m, kill_it);
+
 	} else {
 		/*
 		 * Handle an MCE which has happened in kernel space but from
@@ -1422,6 +1432,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
 			if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
 				mce_panic("Failed kernel mode recovery", &m, msg);
 		}
+
+		if (m.kflags & MCE_IN_KERNEL_COPYIN)
+			queue_task_work(&m, kill_it);
 	}
 out:
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93ecd930efd316..2cbba3e2b1504f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1308,6 +1308,8 @@ struct task_struct {
 #endif
 
 #ifdef CONFIG_X86_MCE
+	void __user			*mce_vaddr;
+	__u64				mce_kflags;
 	u64				mce_addr;
 	__u64				mce_ripv : 1,
 					mce_whole_page : 1,

From 300638101329e8f1569115f3d7197ef5ef754a3a Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Tue, 6 Oct 2020 14:09:10 -0700
Subject: [PATCH 256/262] x86/mce: Decode a kernel instruction to determine if
 it is copying from user

All instructions copying data between kernel and user memory
are tagged with either _ASM_EXTABLE_UA or _ASM_EXTABLE_CPY
entries in the exception table. ex_fault_handler_type() returns
EX_HANDLER_UACCESS for both of these.

Recovery is only possible when the machine check was triggered
on a read from user memory. In this case the same strategy for
recovery applies as if the user had made the access in ring3. If
the fault was in kernel memory while copying to user there is no
current recovery plan.

For MOV and MOVZ instructions a full decode of the instruction
is done to find the source address. For MOVS instructions
the source address is in the %rsi register. The function
fault_in_kernel_space() determines whether the source address is
kernel or user, upgrade it from "static" so it can be used here.

Co-developed-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-7-tony.luck@intel.com
---
 arch/x86/include/asm/traps.h       |  2 ++
 arch/x86/kernel/cpu/mce/core.c     | 11 +++++--
 arch/x86/kernel/cpu/mce/severity.c | 53 +++++++++++++++++++++++++++++-
 arch/x86/mm/fault.c                |  2 +-
 4 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 714b1a30e7b09a..df0b7bfc123495 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
 
 void math_emulate(struct math_emu_info *);
 
+bool fault_in_kernel_space(unsigned long address);
+
 #ifdef CONFIG_VMAP_STACK
 void __noreturn handle_stack_overflow(const char *message,
 				      struct pt_regs *regs,
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5c423c4bab6828..3d6e1bfa4f9d71 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1250,14 +1250,19 @@ static void kill_me_maybe(struct callback_head *cb)
 	if (!p->mce_ripv)
 		flags |= MF_MUST_KILL;
 
-	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
+	    !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 		sync_core();
 		return;
 	}
 
-	pr_err("Memory error not recovered");
-	kill_me_now(cb);
+	if (p->mce_vaddr != (void __user *)-1l) {
+		force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
+	} else {
+		pr_err("Memory error not recovered");
+		kill_me_now(cb);
+	}
 }
 
 static void queue_task_work(struct mce *m, int kill_it)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index c6494e6579c1e9..83df991314c531 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -13,6 +13,9 @@
 
 #include <asm/mce.h>
 #include <asm/intel-family.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
 
 #include "internal.h"
 
@@ -212,6 +215,47 @@ static struct severity {
 #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
 				(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
 
+static bool is_copy_from_user(struct pt_regs *regs)
+{
+	u8 insn_buf[MAX_INSN_SIZE];
+	struct insn insn;
+	unsigned long addr;
+
+	if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+		return false;
+
+	kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+	insn_get_opcode(&insn);
+	if (!insn.opcode.got)
+		return false;
+
+	switch (insn.opcode.value) {
+	/* MOV mem,reg */
+	case 0x8A: case 0x8B:
+	/* MOVZ mem,reg */
+	case 0xB60F: case 0xB70F:
+		insn_get_modrm(&insn);
+		insn_get_sib(&insn);
+		if (!insn.modrm.got || !insn.sib.got)
+			return false;
+		addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+		break;
+	/* REP MOVS */
+	case 0xA4: case 0xA5:
+		addr = regs->si;
+		break;
+	default:
+		return false;
+	}
+
+	if (fault_in_kernel_space(addr))
+		return false;
+
+	current->mce_vaddr = (void __user *)addr;
+
+	return true;
+}
+
 /*
  * If mcgstatus indicated that ip/cs on the stack were
  * no good, then "m->cs" will be zero and we will have
@@ -229,10 +273,17 @@ static int error_context(struct mce *m, struct pt_regs *regs)
 
 	if ((m->cs & 3) == 3)
 		return IN_USER;
+	if (!mc_recoverable(m->mcgstatus))
+		return IN_KERNEL;
 
 	t = ex_get_fault_handler_type(m->ip);
-	if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
+	if (t == EX_HANDLER_FAULT) {
+		m->kflags |= MCE_IN_KERNEL_RECOV;
+		return IN_KERNEL_RECOV;
+	}
+	if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
 		m->kflags |= MCE_IN_KERNEL_RECOV;
+		m->kflags |= MCE_IN_KERNEL_COPYIN;
 		return IN_KERNEL_RECOV;
 	}
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e98324f..88ae443e4e5f9e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1081,7 +1081,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int fault_in_kernel_space(unsigned long address)
+bool fault_in_kernel_space(unsigned long address)
 {
 	/*
 	 * On 64-bit systems, the vsyscall page is at an address above

From d433ab42fdc2c8a32e5df7d53833310f0ab9822c Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Tue, 6 Oct 2020 20:44:53 +0100
Subject: [PATCH 257/262] arm64: random: Remove no longer needed prototypes

Commit 9bceb80b3cc4 ("arm64: kaslr: Use standard early random
function") removed the direct calls of the __arm64_rndr() and
__early_cpu_has_rndr() functions, but left the dummy prototypes in the
 #else branch of the #ifdef CONFIG_ARCH_RANDOM guard.

Remove the redundant prototypes, as they have no users outside of
this header file.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20201006194453.36519-1-andre.przywara@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/archrandom.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h
index 44209f6146aa19..ffb1a40d5475e8 100644
--- a/arch/arm64/include/asm/archrandom.h
+++ b/arch/arm64/include/asm/archrandom.h
@@ -79,10 +79,5 @@ arch_get_random_seed_long_early(unsigned long *v)
 }
 #define arch_get_random_seed_long_early arch_get_random_seed_long_early
 
-#else
-
-static inline bool __arm64_rndr(unsigned long *v) { return false; }
-static inline bool __init __early_cpu_has_rndr(void) { return false; }
-
 #endif /* CONFIG_ARCH_RANDOM */
 #endif /* _ASM_ARCHRANDOM_H */

From 0888e1030d3e3e5ce9dfd8e030cf13a2e9a1519a Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 5 Oct 2020 08:11:22 -0700
Subject: [PATCH 258/262] x86/asm: Carve out a generic movdir64b() helper for
 general usage

Carve out the MOVDIR64B inline asm primitive into a generic helper so
that it can be used by other functions. Move it to special_insns.h and
have iosubmit_cmds512() call it.

 [ bp: Massage commit message. ]

Suggested-by: Michael Matz <matz@suse.de>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201005151126.657029-2-dave.jiang@intel.com
---
 arch/x86/include/asm/io.h            | 17 +++--------------
 arch/x86/include/asm/special_insns.h | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e1aa17a468a858..d726459d08e5a1 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -401,7 +401,7 @@ extern bool phys_mem_access_encrypted(unsigned long phys_addr,
 
 /**
  * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
- * @__dst: destination, in MMIO space (must be 512-bit aligned)
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
  * @src: source
  * @count: number of 512 bits quantities to submit
  *
@@ -412,25 +412,14 @@ extern bool phys_mem_access_encrypted(unsigned long phys_addr,
  * Warning: Do not use this helper unless your driver has checked that the CPU
  * instruction is supported on the platform.
  */
-static inline void iosubmit_cmds512(void __iomem *__dst, const void *src,
+static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
 				    size_t count)
 {
-	/*
-	 * Note that this isn't an "on-stack copy", just definition of "dst"
-	 * as a pointer to 64-bytes of stuff that is going to be overwritten.
-	 * In the MOVDIR64B case that may be needed as you can use the
-	 * MOVDIR64B instruction to copy arbitrary memory around. This trick
-	 * lets the compiler know how much gets clobbered.
-	 */
-	volatile struct { char _[64]; } *dst = __dst;
 	const u8 *from = src;
 	const u8 *end = from + count * 64;
 
 	while (from < end) {
-		/* MOVDIR64B [rdx], rax */
-		asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
-			     : "=m" (dst)
-			     : "d" (from), "a" (dst));
+		movdir64b(dst, from);
 		from += 64;
 	}
 }
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 59a3e13204c348..d4baa0eb556489 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -234,6 +234,28 @@ static inline void clwb(volatile void *__p)
 
 #define nop() asm volatile ("nop")
 
+/* The dst parameter must be 64-bytes aligned */
+static inline void movdir64b(void *dst, const void *src)
+{
+	const struct { char _[64]; } *__src = src;
+	struct { char _[64]; } *__dst = dst;
+
+	/*
+	 * MOVDIR64B %(rdx), rax.
+	 *
+	 * Both __src and __dst must be memory constraints in order to tell the
+	 * compiler that no other memory accesses should be reordered around
+	 * this one.
+	 *
+	 * Also, both must be supplied as lvalues because this tells
+	 * the compiler what the object is (its size) the instruction accesses.
+	 * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+	 */
+	asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+		     : "+m" (*__dst)
+		     :  "m" (*__src), "a" (__dst), "d" (__src));
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_SPECIAL_INSNS_H */

From 7f5933f81bd85a0bf6a87d65c7327ea048a75e54 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 5 Oct 2020 08:11:23 -0700
Subject: [PATCH 259/262] x86/asm: Add an enqcmds() wrapper for the ENQCMDS
 instruction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the MOVDIR64B instruction is used to atomically submit
64-byte work descriptors to devices. Although it can encounter errors
like device queue full, command not accepted, device not ready, etc when
writing to a device MMIO, MOVDIR64B can not report back on errors from
the device itself. This means that MOVDIR64B users need to separately
interact with a device to see if a descriptor was successfully queued,
which slows down device interactions.

ENQCMD and ENQCMDS also atomically submit 64-byte work descriptors
to devices. But, they *can* report back errors directly from the
device, such as if the device was busy, or device not enabled or does
not support the command. This immediate feedback from the submission
instruction itself reduces the number of interactions with the device
and can greatly increase efficiency.

ENQCMD can be used at any privilege level, but can effectively only
submit work on behalf of the current process. ENQCMDS is a ring0-only
instruction and can explicitly specify a process context instead of
being tied to the current process or needing to reprogram the IA32_PASID
MSR.

Use ENQCMDS for work submission within the kernel because a Process
Address ID (PASID) is setup to translate the kernel virtual address
space. This PASID is provided to ENQCMDS from the descriptor structure
submitted to the device and not retrieved from IA32_PASID MSR, which is
setup for the current user address space.

See Intel Software Developer’s Manual for more information on the
instructions.

 [ bp:
   - Make operand constraints like movdir64b() because both insns are
     basically doing the same thing, more or less.
   - Fixup comments and cleanup. ]

Link: https://lkml.kernel.org/r/20200924180041.34056-3-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20201005151126.657029-3-dave.jiang@intel.com
---
 arch/x86/include/asm/special_insns.h | 42 ++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d4baa0eb556489..5482c2d37f5445 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -256,6 +256,48 @@ static inline void movdir64b(void *dst, const void *src)
 		     :  "m" (*__src), "a" (__dst), "d" (__src));
 }
 
+/**
+ * enqcmds - Enqueue a command in supervisor (CPL0) mode
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: 512 bits memory operand
+ *
+ * The ENQCMDS instruction allows software to write a 512-bit command to
+ * a 512-bit-aligned special MMIO region that supports the instruction.
+ * A return status is loaded into the ZF flag in the RFLAGS register.
+ * ZF = 0 equates to success, and ZF = 1 indicates retry or error.
+ *
+ * This function issues the ENQCMDS instruction to submit data from
+ * kernel space to MMIO space, in a unit of 512 bits. Order of data access
+ * is not guaranteed, nor is a memory barrier performed afterwards. It
+ * returns 0 on success and -EAGAIN on failure.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the
+ * ENQCMDS instruction is supported on the platform and the device accepts
+ * ENQCMDS.
+ */
+static inline int enqcmds(void __iomem *dst, const void *src)
+{
+	const struct { char _[64]; } *__src = src;
+	struct { char _[64]; } *__dst = dst;
+	int zf;
+
+	/*
+	 * ENQCMDS %(rdx), rax
+	 *
+	 * See movdir64b()'s comment on operand specification.
+	 */
+	asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
+		     CC_SET(z)
+		     : CC_OUT(z) (zf), "+m" (*__dst)
+		     : "m" (*__src), "a" (__dst), "d" (__src));
+
+	/* Submission failure is indicated via EFLAGS.ZF=1 */
+	if (zf)
+		return -EAGAIN;
+
+	return 0;
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_SPECIAL_INSNS_H */

From b3149ffcdb31a8eb854cc442a389ae0b539bf28a Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 7 Oct 2020 18:55:35 +0200
Subject: [PATCH 260/262] x86/mce: Allow for copy_mc_fragile symbol checksum to
 be generated

Add asm/mce.h to asm/asm-prototypes.h so that that asm symbol's checksum
can be generated in order to support CONFIG_MODVERSIONS with it and fix:

  WARNING: modpost: EXPORT symbol "copy_mc_fragile" [vmlinux] version \
	  generation failed, symbol will not be versioned.

For reference see:

  4efca4ed05cb ("kbuild: modversions for EXPORT_SYMBOL() for asm")
  334bb7738764 ("x86/kbuild: enable modversions for symbols exported from asm")

Fixes: ec6347bb4339 ("x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()")
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201007111447.GA23257@zn.tnic
---
 arch/x86/include/asm/asm-prototypes.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 5a42f920613874..51e2bf27cc9b0d 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -5,6 +5,7 @@
 #include <asm/string.h>
 #include <asm/page.h>
 #include <asm/checksum.h>
+#include <asm/mce.h>
 
 #include <asm-generic/asm-prototypes.h>
 

From d13027bb35e089bc1bb9f19c4976decf32a09b97 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 9 Oct 2020 11:24:17 +0100
Subject: [PATCH 261/262] Revert "arm64: initialize per-cpu offsets earlier"

This reverts commit 353e228eb355be5a65a3c0996c774a0f46737fda.

Qian Cai reports that TX2 no longer boots with his .config as it appears
that task_cpu() gets instrumented and used before KASAN has been
initialised.

Although Mark has a proposed fix, let's take the safe option of reverting
this for now and sorting it out properly later.

Link: https://lore.kernel.org/r/711bc57a314d8d646b41307008db2845b7537b3d.camel@redhat.com
Reported-by: Qian Cai <cai@redhat.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu.h |  2 --
 arch/arm64/kernel/head.S     |  3 ---
 arch/arm64/kernel/setup.c    | 12 ++++++------
 arch/arm64/kernel/smp.c      | 13 +++++--------
 4 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index d9d60b18e8116f..7faae6ff3ab4d5 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -68,6 +68,4 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info);
 void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
 				 struct cpuinfo_arm64 *boot);
 
-void init_this_cpu_offset(void);
-
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e28c9d4e5278cb..d8d9caf02834e0 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -448,8 +448,6 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
-	bl	init_this_cpu_offset
-
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
 #endif
@@ -756,7 +754,6 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	ptrauth_keys_init_cpu x2, x3, x4, x5
 #endif
 
-	bl	init_this_cpu_offset
 	b	secondary_start_kernel
 SYM_FUNC_END(__secondary_switched)
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 005171972764b6..77c4c9bad1b852 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -87,6 +87,12 @@ void __init smp_setup_processor_id(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	set_cpu_logical_map(0, mpidr);
 
+	/*
+	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
+	 * using percpu variable early, for example, lockdep will
+	 * access percpu variable inside lock_release
+	 */
+	set_my_cpu_offset(0);
 	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
 		(unsigned long)mpidr, read_cpuid_id());
 }
@@ -276,12 +282,6 @@ u64 cpu_logical_map(int cpu)
 }
 EXPORT_SYMBOL_GPL(cpu_logical_map);
 
-void noinstr init_this_cpu_offset(void)
-{
-	unsigned int cpu = task_cpu(current);
-	set_my_cpu_offset(per_cpu_offset(cpu));
-}
-
 void __init __no_sanitize_address setup_arch(char **cmdline_p)
 {
 	init_mm.start_code = (unsigned long) _text;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 7714310fba2264..355ee9eed4dded 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -192,7 +192,10 @@ asmlinkage notrace void secondary_start_kernel(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;
 	const struct cpu_operations *ops;
-	unsigned int cpu = smp_processor_id();
+	unsigned int cpu;
+
+	cpu = task_cpu(current);
+	set_my_cpu_offset(per_cpu_offset(cpu));
 
 	/*
 	 * All kernel threads share the same mm context; grab a
@@ -432,13 +435,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
-	/*
-	 * Now that setup_per_cpu_areas() has allocated the runtime per-cpu
-	 * areas it is only safe to read the CPU0 boot-time area, and we must
-	 * reinitialize the offset to point to the runtime area.
-	 */
-	init_this_cpu_offset();
-
+	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 	cpuinfo_store_boot_cpu();
 
 	/*

From b4210eab9164e3ea647b71e1049391177db9b215 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Fri, 9 Oct 2020 17:18:03 +0000
Subject: [PATCH 262/262] EDAC/amd64: Set proper family type for Family 19h
 Models 20h-2Fh

AMD Family 19h Models 20h-2Fh use the same PCI IDs as Family 17h Models
70h-7Fh. The same family ops and number of channels also apply.

Use the Family17h Model 70h family_type and ops for Family 19h Models
20h-2Fh. Update the controller name to match the system.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201009171803.3214354-1-Yazen.Ghannam@amd.com
---
 drivers/edac/amd64_edac.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 6262f6370c5dea..1c29cd0055bb96 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3385,6 +3385,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
 		break;
 
 	case 0x19:
+		if (pvt->model >= 0x20 && pvt->model <= 0x2f) {
+			fam_type = &family_types[F17_M70H_CPUS];
+			pvt->ops = &family_types[F17_M70H_CPUS].ops;
+			fam_type->ctl_name = "F19h_M20h";
+			break;
+		}
 		fam_type	= &family_types[F19_CPUS];
 		pvt->ops	= &family_types[F19_CPUS].ops;
 		family_types[F19_CPUS].ctl_name = "F19h";