Skip to content

Commit

Permalink
Add purging CPU L2 and L3 caches into NPU hreset.
Browse files Browse the repository at this point in the history
If a GPU is passed through to a guest and the guest unexpectedly terminates,
there can be cache lines in CPUs that belong to the GPU. So purge the caches
as part of the reset sequence. L1 is write through, so doesn't need to be purged.

The sequence to purge the L2 and L3 caches from the hw team:

"L2 purge:
 (1) initiate purge
 putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TYPE L2CAC_FLUSH -all
 putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TRIGGER ON -all

 (2) check this is off in all caches to know purge completed
 getspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_REG_BUSY -all

 (3) putspy pu.ex EXP.L2.L2MISC.L2CERRS.PRD_PURGE_CMD_TRIGGER OFF -all

L3 purge:
 1) Start the purge:
 putspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_TTYPE FULL_PURGE -all
 putspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ ON -all

 2) Ensure that the purge has completed by checking the status bit:
 getspy pu.ex EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ -all

 You should see it say OFF if it's done:
 p9n.ex k0:n0:s0:p00:c0
 EXP.L3.L3_MISC.L3CERRS.L3_PRD_PURGE_REQ
 OFF"

Suggested-by: Alistair Popple <alistair@popple.id.au>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Rashmica Gupta <rashmica.g@gmail.com>
Signed-off-by: Stewart Smith <stewart@linux.ibm.com>
  • Loading branch information
RashmicaG authored and stewartsmith committed Dec 11, 2018
1 parent 8340a96 commit b2e120f
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 1 deletion.
139 changes: 138 additions & 1 deletion hw/npu2.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,13 +326,146 @@ static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
return npu2_cfg_read_bar(ndev, pcrf, offset, len, data);
}

static int start_l2_purge(uint32_t chip_id, uint32_t core_id)
{
uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
int rc;

rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH,
L2_PRD_PURGE_CMD_TYPE_MASK);
if (!rc)
rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
L2_PRD_PURGE_CMD_TRIGGER);
if (rc)
prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask "
"failed %i\n", core_id, rc);
return rc;
}

static int wait_l2_purge(uint32_t chip_id, uint32_t core_id)
{
uint64_t val;
uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
unsigned long now = mftb();
unsigned long end = now + msecs_to_tb(2);
int rc;

while (1) {
rc = xscom_read(chip_id, addr, &val);
if (rc) {
prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read "
"failed %i\n", core_id, rc);
break;
}
if (!(val & L2_PRD_PURGE_CMD_REG_BUSY))
break;
now = mftb();
if (tb_compare(now, end) == TB_AAFTERB) {
prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n",
core_id, rc);
return OPAL_BUSY;
}
}

/* We have to clear the trigger bit ourselves */
val &= ~L2_PRD_PURGE_CMD_TRIGGER;
rc = xscom_write(chip_id, addr, val);
if (rc)
prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n",
core_id, rc);
return rc;
}

static int start_l3_purge(uint32_t chip_id, uint32_t core_id)
{
uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
int rc;

rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE,
L3_PRD_PURGE_TTYPE_MASK);
if (!rc)
rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ,
L3_PRD_PURGE_REQ);
if (rc)
prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask "
"failed %i\n", core_id, rc);
return rc;
}

static int wait_l3_purge(uint32_t chip_id, uint32_t core_id)
{
uint64_t val;
uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
unsigned long now = mftb();
unsigned long end = now + msecs_to_tb(2);
int rc;

/* Trigger bit is automatically set to zero when flushing is done */
while (1) {
rc = xscom_read(chip_id, addr, &val);
if (rc) {
prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read "
"failed %i\n", core_id, rc);
break;
}
if (!(val & L3_PRD_PURGE_REQ))
break;
now = mftb();
if (tb_compare(now, end) == TB_AAFTERB) {
prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n",
core_id, rc);
return OPAL_BUSY;
}
}
return rc;
}

static int64_t purge_l2_l3_caches(void)
{
struct cpu_thread *t;
uint64_t core_id, prev_core_id = (uint64_t)-1;
int rc;

for_each_ungarded_cpu(t) {
/* Only need to do it once per core chiplet */
core_id = pir_to_core_id(t->pir);
if (prev_core_id == core_id)
continue;
prev_core_id = core_id;
rc = start_l2_purge(t->chip_id, core_id);
if (rc)
return rc;
rc = start_l3_purge(t->chip_id, core_id);
if (rc)
return rc;
}

prev_core_id = (uint64_t)-1;
for_each_ungarded_cpu(t) {
/* Only need to do it once per core chiplet */
core_id = pir_to_core_id(t->pir);
if (prev_core_id == core_id)
continue;
prev_core_id = core_id;

rc = wait_l2_purge(t->chip_id, core_id);
if (rc)
return rc;
rc = wait_l3_purge(t->chip_id, core_id);
if (rc)
return rc;
}
return OPAL_SUCCESS;
}

static int64_t npu2_dev_cfg_exp_devcap(void *dev,
struct pci_cfg_reg_filter *pcrf __unused,
uint32_t offset, uint32_t size,
uint32_t *data, bool write)
{
struct pci_virt_device *pvd = dev;
struct npu2_dev *ndev = pvd->data;
int rc;

assert(write);

Expand All @@ -346,6 +479,10 @@ static int64_t npu2_dev_cfg_exp_devcap(void *dev,
if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
npu2_dev_procedure_reset(ndev);

rc = purge_l2_l3_caches();
if (rc)
return rc;

return OPAL_PARTIAL;
}

Expand Down Expand Up @@ -1125,7 +1262,7 @@ static int64_t npu2_hreset(struct pci_slot *slot __unused)
reset_ntl(ndev);
}
}
return OPAL_SUCCESS;
return purge_l2_l3_caches();
}

static int64_t npu2_freset(struct pci_slot *slot __unused)
Expand Down
11 changes: 11 additions & 0 deletions include/npu2-regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,4 +758,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define OB3_ODL0_ENDPOINT_INFO 0xC010832
#define OB3_ODL1_ENDPOINT_INFO 0xC010833

/* Registers and bits used to clear the L2 and L3 cache */
#define L2_PRD_PURGE_CMD_REG 0x1080E
#define L2_PRD_PURGE_CMD_REG_BUSY 0x0040000000000000
#define L2_PRD_PURGE_CMD_TYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4)
#define L2_PRD_PURGE_CMD_TRIGGER PPC_BIT(0)
#define L2CAC_FLUSH 0x0
#define L3_PRD_PURGE_REG 0x1180E
#define L3_PRD_PURGE_REQ PPC_BIT(0)
#define L3_PRD_PURGE_TTYPE_MASK PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3) | PPC_BIT(4)
#define L3_FULL_PURGE 0x0

#endif /* __NPU2_REGS_H */

0 comments on commit b2e120f

Please sign in to comment.