Skip to content

Commit

Permalink
Revert "NPU2 HMIs: dump out a *LOT* of npu2 registers for debugging"
Browse files Browse the repository at this point in the history
This reverts commit fbdc91e.

We don't need this as we need to do it a different way, with a explicit
set of registers as otherwise we trip other random FIR bits and everything
becomes even more terrible.

I suggest alcohol.

Cc: stable
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
  • Loading branch information
stewartsmith committed Mar 27, 2018
1 parent 351b05b commit 80452d2
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 69 deletions.
38 changes: 1 addition & 37 deletions core/hmi.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2013-2018 IBM Corp.
/* Copyright 2013-2014 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,7 +29,6 @@
#include <npu2-regs.h>
#include <npu.h>
#include <capp.h>
#include <nvram.h>

/*
* HMER register layout:
Expand Down Expand Up @@ -585,10 +584,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
uint64_t npu2_fir_action0_addr;
uint64_t npu2_fir_action1_addr;
uint64_t fatal_errors;
uint64_t npu_scom_dump[2];
bool npu2_hmi_verbose;
int total_errors = 0;
uint64_t r;

/* Find the NPU on the chip associated with the HMI. */
for_each_phb(phb) {
Expand Down Expand Up @@ -640,38 +636,6 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
if (!total_errors)
return;

npu2_hmi_verbose = nvram_query_eq("npu2-hmi-verbose", "true");
/* Force this for now until we sort out something better */
npu2_hmi_verbose = true;

if (npu2_hmi_verbose) {
_xscom_lock();
for (r = NPU2_DEBUG_REG_START; r < NPU2_DEBUG_REG_END; r++) {
npu_scom_dump[0] = npu_scom_dump[1] = 0;
_xscom_read(flat_chip_id, r++, &npu_scom_dump[0], false, true);
_xscom_read(flat_chip_id, r, &npu_scom_dump[1], false, true);
prlog(PR_ERR, "NPU: 0x%016llx=0x%016llx 0x%016llx=0x%016llx\n",
r-1, npu_scom_dump[0],
r, npu_scom_dump[1]);
}
for (r = NPU2_FIR_REGISTER_0; r < NPU2_FIR_REGISTER_END; r++) {
npu_scom_dump[0] = npu_scom_dump[1] = 0;
_xscom_read(flat_chip_id, r++, &npu_scom_dump[0], false, true);
_xscom_read(flat_chip_id, r, &npu_scom_dump[1], false, true);
prlog(PR_ERR, "NPU: 0x%016llx=0x%016llx 0x%016llx=0x%016llx\n",
r-1, npu_scom_dump[0],
r, npu_scom_dump[1]);
}
_xscom_unlock();
prlog(PR_ERR, " _________________________ \n");
prlog(PR_ERR, "< It's Driver Debug time! >\n");
prlog(PR_ERR, " ------------------------- \n");
prlog(PR_ERR, " \\ ,__, \n");
prlog(PR_ERR, " \\ (oo)____ \n");
prlog(PR_ERR, " (__) )\\ \n");
prlog(PR_ERR, " ||--|| * \n");
}

/* Set up the HMI event */
hmi_evt->severity = OpalHMI_SEV_WARNING;
hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT;
Expand Down
4 changes: 2 additions & 2 deletions hw/slw.c
Original file line number Diff line number Diff line change
Expand Up @@ -1620,7 +1620,7 @@ void slw_update_timer_expiry(uint64_t new_target)
/* Grab generation and spin if odd */
_xscom_lock();
for (;;) {
rc = _xscom_read(slw_timer_chip, 0xE0006, &gen, false, false);
rc = _xscom_read(slw_timer_chip, 0xE0006, &gen, false);
if (rc) {
prerror("SLW: Error %lld reading tmr gen "
" count\n", rc);
Expand Down Expand Up @@ -1664,7 +1664,7 @@ void slw_update_timer_expiry(uint64_t new_target)
}

/* Re-check gen count */
rc = _xscom_read(slw_timer_chip, 0xE0006, &gen2, false, false);
rc = _xscom_read(slw_timer_chip, 0xE0006, &gen2, false);
if (rc) {
prerror("SLW: Error %lld re-reading tmr gen "
" count\n", rc);
Expand Down
36 changes: 14 additions & 22 deletions hw/xscom.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,8 @@ static int xscom_clear_error(uint32_t gcid, uint32_t pcb_addr)
}

static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_addr,
bool is_write, int64_t retries,
int64_t *xscom_clear_retries,
bool ignore_error)
bool is_write, int64_t retries,
int64_t *xscom_clear_retries)
{
unsigned int stat = GETFIELD(SPR_HMER_XSCOM_STATUS, hmer);
int64_t rc = OPAL_HARDWARE;
Expand Down Expand Up @@ -278,12 +277,9 @@ static int64_t xscom_handle_error(uint64_t hmer, uint32_t gcid, uint32_t pcb_add
}

/* XXX: Create error log entry ? */
if (!ignore_error)
log_simple_error(&e_info(OPAL_RC_XSCOM_RW),
"XSCOM: %s error gcid=0x%x "
"pcb_addr=0x%x stat=0x%x\n",
is_write ? "write" : "read", gcid,
pcb_addr, stat);
log_simple_error(&e_info(OPAL_RC_XSCOM_RW),
"XSCOM: %s error gcid=0x%x pcb_addr=0x%x stat=0x%x\n",
is_write ? "write" : "read", gcid, pcb_addr, stat);

/* We need to reset the XSCOM or we'll hang on the next access */
xscom_reset(gcid, false);
Expand Down Expand Up @@ -326,16 +322,14 @@ static inline bool xscom_is_multicast_addr(uint32_t addr)
* Low level XSCOM access functions, perform a single direct xscom
* access via MMIO
*/
static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val,
bool ignore_error)
static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val)
{
uint64_t hmer;
int64_t ret, retries;
int64_t xscom_clear_retries = XSCOM_CLEAR_MAX_RETRIES;

if (!xscom_gcid_ok(gcid)) {
if (!ignore_error)
prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
prerror("%s: invalid XSCOM gcid 0x%x\n", __func__, gcid);
return OPAL_PARAMETER;
}

Expand All @@ -357,7 +351,7 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val,

/* Handle error and possibly eventually retry */
ret = xscom_handle_error(hmer, gcid, pcb_addr, false, retries,
&xscom_clear_retries, ignore_error);
&xscom_clear_retries);
if (ret != OPAL_BUSY)
break;
}
Expand All @@ -376,8 +370,7 @@ static int __xscom_read(uint32_t gcid, uint32_t pcb_addr, uint64_t *val,
if (proc_gen == proc_gen_p9 && ret == OPAL_XSCOM_CHIPLET_OFF)
return ret;

if (!ignore_error)
prerror("XSCOM: Read failed, ret = %lld\n", ret);
prerror("XSCOM: Read failed, ret = %lld\n", ret);
return ret;
}

Expand Down Expand Up @@ -410,7 +403,7 @@ static int __xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val)

/* Handle error and possibly eventually retry */
ret = xscom_handle_error(hmer, gcid, pcb_addr, true, retries,
&xscom_clear_retries, false);
&xscom_clear_retries);
if (ret != OPAL_BUSY)
break;
}
Expand Down Expand Up @@ -458,7 +451,7 @@ static int xscom_indirect_read_form0(uint32_t gcid, uint64_t pcb_addr,

/* Wait for completion */
for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
rc = __xscom_read(gcid, addr, &data, false);
rc = __xscom_read(gcid, addr, &data);
if (rc)
goto bail;
if ((data & XSCOM_DATA_IND_COMPLETE) &&
Expand Down Expand Up @@ -520,7 +513,7 @@ static int xscom_indirect_write_form0(uint32_t gcid, uint64_t pcb_addr,

/* Wait for completion */
for (retries = 0; retries < XSCOM_IND_MAX_RETRIES; retries++) {
rc = __xscom_read(gcid, addr, &data, false);
rc = __xscom_read(gcid, addr, &data);
if (rc)
goto bail;
if ((data & XSCOM_DATA_IND_COMPLETE) &&
Expand Down Expand Up @@ -595,8 +588,7 @@ void _xscom_unlock(void)
/*
* External API
*/
int _xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val,
bool take_lock, bool ignore_error)
int _xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val, bool take_lock)
{
uint32_t gcid;
int rc;
Expand Down Expand Up @@ -643,7 +635,7 @@ int _xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val,
if (pcb_addr & XSCOM_ADDR_IND_FLAG)
rc = xscom_indirect_read(gcid, pcb_addr, val);
else
rc = __xscom_read(gcid, pcb_addr & 0x7fffffff, val, ignore_error);
rc = __xscom_read(gcid, pcb_addr & 0x7fffffff, val);

/* Unlock it */
if (take_lock)
Expand Down
7 changes: 1 addition & 6 deletions include/npu2-regs.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright 2013-2018 IBM Corp.
/* Copyright 2013-2016 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,10 +29,6 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
uint64_t reg, uint64_t size,
uint64_t val);

/* SCOM Registers to dump on HMI to aid in debugging */
#define NPU2_DEBUG_REG_START 0x5011000
#define NPU2_DEBUG_REG_END 0x50110FF

/* These aren't really NPU specific registers but we initialise them in NPU
* code */
#define MCD0_BANK0_CN3 0x301100d
Expand Down Expand Up @@ -576,7 +572,6 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
#define NPU2_FIR_REGISTER_0 0x0000000005013C00
#define NPU2_FIR_REGISTER_1 0x0000000005013C40
#define NPU2_FIR_REGISTER_2 0x0000000005013C80
#define NPU2_FIR_REGISTER_END 0x0000000005013CFF

#define NPU2_TOTAL_FIR_REGISTERS 3

Expand Down
4 changes: 2 additions & 2 deletions include/xscom.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,15 @@

/* Use only in select places where multiple SCOMs are time/latency sensitive */
extern void _xscom_lock(void);
extern int _xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val, bool take_lock, bool ignore_error);
extern int _xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val, bool take_lock);
extern int _xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val, bool take_lock);
extern void _xscom_unlock(void);


/* Targeted SCOM access */
static inline int xscom_read(uint32_t partid, uint64_t pcb_addr, uint64_t *val)
{
return _xscom_read(partid, pcb_addr, val, true, false);
return _xscom_read(partid, pcb_addr, val, true);
}
static inline int xscom_write(uint32_t partid, uint64_t pcb_addr, uint64_t val) {
return _xscom_write(partid, pcb_addr, val, true);
Expand Down

0 comments on commit 80452d2

Please sign in to comment.