Skip to content

Commit

Permalink
drm/amdgpu: revise RLCG access path
Browse files Browse the repository at this point in the history
what changed:
1)provide new implementation interface for the rlcg access path
2)put SQ_CMD/SQ_IND_INDEX to GFX9 RLCG path to let debugfs's reg_op
function can access reg that need RLCG path help

now even debugfs's reg_op can used to dump wave.

tested-by: Monk Liu <monk.liu@amd.com>
tested-by: Zhou pengju <pengju.zhou@amd.com>
Signed-off-by: Zhou pengju <pengju.zhou@amd.com>
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Emily Deng <Emily.Deng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
ljymonk authored and alexdeucher committed Mar 16, 2020
1 parent a7fbb63 commit 2e0cc4d
Show file tree
Hide file tree
Showing 9 changed files with 223 additions and 21 deletions.
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f,
} else {
r = get_user(value, (uint32_t *)buf);
if (!r)
WREG32(*pos >> 2, value);
amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0);
}
if (r) {
result = r;
Expand Down
50 changes: 37 additions & 13 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,26 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
BUG();
}

void static inline amdgpu_mm_wreg_mmio(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags)
{
trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);

if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
else {
unsigned long flags;

spin_lock_irqsave(&adev->mmio_idx_lock, flags);
writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
}

if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
udelay(500);
}
}

/**
* amdgpu_mm_wreg - write to a memory mapped IO register
*
Expand All @@ -319,29 +339,33 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
{
trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);

if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
adev->last_mm_index = v;
}

if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)))
return amdgpu_kiq_wreg(adev, reg, v);

if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
else {
unsigned long flags;
amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
}

spin_lock_irqsave(&adev->mmio_idx_lock, flags);
writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
writel(v, ((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
}
/*
* amdgpu_mm_wreg_mmio_rlc - write register either with mmio or with RLC path if in range
*
* this function is invoked only the debugfs register access
* */
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
{
if (amdgpu_sriov_fullaccess(adev) &&
adev->gfx.rlc.funcs &&
adev->gfx.rlc.funcs->is_rlcg_access_range) {

if (adev->asic_type >= CHIP_VEGA10 && reg == 1 && adev->last_mm_index == 0x5702C) {
udelay(500);
if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
return adev->gfx.rlc.funcs->rlcg_wreg(adev, reg, v);
}

amdgpu_mm_wreg_mmio(adev, reg, v, acc_flags);
}

/**
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ struct amdgpu_rlc_funcs {
void (*reset)(struct amdgpu_device *adev);
void (*start)(struct amdgpu_device *adev);
void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid);
void (*rlcg_wreg)(struct amdgpu_device *adev, u32 offset, u32 v);
bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg);
};

struct amdgpu_rlc {
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ struct amdgpu_virt {
#define amdgpu_sriov_runtime(adev) \
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_RUNTIME)

#define amdgpu_sriov_fullaccess(adev) \
(amdgpu_sriov_vf((adev)) && !amdgpu_sriov_runtime((adev)))

#define amdgpu_passthrough(adev) \
((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE)

Expand Down
74 changes: 73 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,49 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
};

static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
{
static void *scratch_reg0;
static void *scratch_reg1;
static void *scratch_reg2;
static void *scratch_reg3;
static void *spare_int;
static uint32_t grbm_cntl;
static uint32_t grbm_idx;
uint32_t i = 0;
uint32_t retries = 50000;

scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;

grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;

if (amdgpu_sriov_runtime(adev)) {
pr_err("shoudn't call rlcg write register during runtime\n");
return;
}

writel(v, scratch_reg0);
writel(offset | 0x80000000, scratch_reg1);
writel(1, spare_int);
for (i = 0; i < retries; i++) {
u32 tmp;

tmp = readl(scratch_reg1);
if (!(tmp & 0x80000000))
break;

udelay(10);
}

if (i >= retries)
pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
}

static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] =
{
/* Pending on emulation bring up */
Expand Down Expand Up @@ -4247,6 +4290,33 @@ static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
}

static bool gfx_v10_0_check_rlcg_range(struct amdgpu_device *adev,
uint32_t offset,
struct soc15_reg_rlcg *entries, int arr_size)
{
int i;
uint32_t reg;

if (!entries)
return false;

for (i = 0; i < arr_size; i++) {
const struct soc15_reg_rlcg *entry;

entry = &entries[i];
reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
if (offset == reg)
return true;
}

return false;
}

static bool gfx_v10_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
{
return gfx_v10_0_check_rlcg_range(adev, offset, NULL, 0);
}

static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
.is_rlc_enabled = gfx_v10_0_is_rlc_enabled,
.set_safe_mode = gfx_v10_0_set_safe_mode,
Expand All @@ -4258,7 +4328,9 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs = {
.stop = gfx_v10_0_rlc_stop,
.reset = gfx_v10_0_rlc_reset,
.start = gfx_v10_0_rlc_start,
.update_spm_vmid = gfx_v10_0_update_spm_vmid
.update_spm_vmid = gfx_v10_0_update_spm_vmid,
.rlcg_wreg = gfx_v10_rlcg_wreg,
.is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range,
};

static int gfx_v10_0_set_powergating_state(void *handle,
Expand Down
99 changes: 96 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,11 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
};

static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
{SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
{SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
};

static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
{
mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
Expand All @@ -721,6 +726,63 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
};

void gfx_v9_0_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v)
{
static void *scratch_reg0;
static void *scratch_reg1;
static void *scratch_reg2;
static void *scratch_reg3;
static void *spare_int;
static uint32_t grbm_cntl;
static uint32_t grbm_idx;
bool shadow;

scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4;
scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4;
scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4;
scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4;
spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4;

grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;

if (amdgpu_sriov_runtime(adev)) {
pr_err("shoudn't call rlcg write register during runtime\n");
return;
}

if (offset == grbm_cntl || offset == grbm_idx)
shadow = true;

if (shadow) {
if (offset == grbm_cntl)
writel(v, scratch_reg2);
else if (offset == grbm_idx)
writel(v, scratch_reg3);

writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
} else {
uint32_t i = 0;
uint32_t retries = 50000;

writel(v, scratch_reg0);
writel(offset | 0x80000000, scratch_reg1);
writel(1, spare_int);
for (i = 0; i < retries; i++) {
u32 tmp;

tmp = readl(scratch_reg1);
if (!(tmp & 0x80000000))
break;

udelay(10);
}
if (i >= retries)
pr_err("timeout: rlcg program reg:0x%05x failed !\n", offset);
}

}

#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
Expand Down Expand Up @@ -1921,7 +1983,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)

static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
{
WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
(address << SQ_IND_INDEX__INDEX__SHIFT) |
Expand All @@ -1933,7 +1995,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
uint32_t wave, uint32_t thread,
uint32_t regno, uint32_t num, uint32_t *out)
{
WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
(regno << SQ_IND_INDEX__INDEX__SHIFT) |
Expand Down Expand Up @@ -4908,6 +4970,35 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
}

static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
uint32_t offset,
struct soc15_reg_rlcg *entries, int arr_size)
{
int i;
uint32_t reg;

if (!entries)
return false;

for (i = 0; i < arr_size; i++) {
const struct soc15_reg_rlcg *entry;

entry = &entries[i];
reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
if (offset == reg)
return true;
}

return false;
}

static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
{
return gfx_v9_0_check_rlcg_range(adev, offset,
(void *)rlcg_access_gc_9_0,
ARRAY_SIZE(rlcg_access_gc_9_0));
}

static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
.set_safe_mode = gfx_v9_0_set_safe_mode,
Expand All @@ -4920,7 +5011,9 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
.stop = gfx_v9_0_rlc_stop,
.reset = gfx_v9_0_rlc_reset,
.start = gfx_v9_0_rlc_start,
.update_spm_vmid = gfx_v9_0_update_spm_vmid
.update_spm_vmid = gfx_v9_0_update_spm_vmid,
.rlcg_wreg = gfx_v9_0_rlcg_wreg,
.is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
};

static int gfx_v9_0_set_powergating_state(void *handle,
Expand Down
7 changes: 7 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/soc15.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ struct soc15_reg_golden {
u32 or_mask;
};

struct soc15_reg_rlcg {
u32 hwip;
u32 instance;
u32 segment;
u32 reg;
};

struct soc15_reg_entry {
uint32_t hwip;
uint32_t inst;
Expand Down
5 changes: 2 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/soc15_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,9 @@
} \
} while (0)

#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a)))
#define WREG32_RLC(reg, value) \
do { \
if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
if (amdgpu_sriov_fullaccess(adev)) { \
uint32_t i = 0; \
uint32_t retries = 50000; \
uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \
Expand All @@ -98,7 +97,7 @@
#define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \
do { \
uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\
if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \
if (amdgpu_sriov_fullaccess(adev)) { \
uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \
uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \
uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \
Expand Down

0 comments on commit 2e0cc4d

Please sign in to comment.