Skip to content

Commit

Permalink
drm/amdgpu: Modify xgmi block to fit for the unified ras block data a…
Browse files Browse the repository at this point in the history
…nd ops

1.Modify gmc block to fit for the unified ras block data and ops.
2.Change amdgpu_xgmi_ras_funcs to amdgpu_xgmi_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of gmc ras variable so that gmc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register gmc ras block into amdgpu device ras block link list.
5.Remove the redundant code about gmc in amdgpu_ras.c after using the unified ras block.

Signed-off-by: yipechai <YiPeng.Chai@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: John Clements <john.clements@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
yipechai authored and alexdeucher committed Jan 14, 2022
1 parent 8b0fb0e commit 6c24538
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 30 deletions.
16 changes: 8 additions & 8 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
Expand Up @@ -454,12 +454,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}

if (!adev->gmc.xgmi.connected_to_cpu)
adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
if (!adev->gmc.xgmi.connected_to_cpu) {
adev->gmc.xgmi.ras = &xgmi_ras;
amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block);
}

if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_late_init) {
r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) {
r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL);
if (r)
return r;
}
Expand Down Expand Up @@ -505,9 +506,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
adev->mmhub.ras_funcs->ras_fini)
adev->mmhub.ras_funcs->ras_fini(adev);

if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini)
adev->gmc.xgmi.ras->ras_block.ras_fini(adev);

if (adev->hdp.ras_funcs &&
adev->hdp.ras_funcs->ras_fini)
Expand Down
11 changes: 4 additions & 7 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
Expand Up @@ -29,6 +29,7 @@
#include <linux/types.h>

#include "amdgpu_irq.h"
#include "amdgpu_ras.h"

/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL
Expand Down Expand Up @@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs {
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
};

struct amdgpu_xgmi_ras_funcs {
int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
int (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
struct amdgpu_xgmi_ras {
struct amdgpu_ras_block_object ras_block;
};

struct amdgpu_xgmi {
Expand All @@ -159,7 +156,7 @@ struct amdgpu_xgmi {
struct ras_common_if *ras_if;
bool connected_to_cpu;
bool pending_reset;
const struct amdgpu_xgmi_ras_funcs *ras_funcs;
struct amdgpu_xgmi_ras *ras;
};

struct amdgpu_gmc {
Expand Down
10 changes: 7 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Expand Up @@ -1012,9 +1012,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL:
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
if (!block_obj || !block_obj->hw_ops) {
dev_info(adev->dev, "%s doesn't config ras function \n",
get_ras_block_str(&info->head));
return -EINVAL;
}
if (block_obj->hw_ops->query_ras_error_count)
block_obj->hw_ops->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__HDP:
if (adev->hdp.ras_funcs &&
Expand Down
26 changes: 16 additions & 10 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
Expand Up @@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return psp_xgmi_terminate(&adev->psp);
}

static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info)
{
int r;
struct ras_ih_if ih_info = {
Expand All @@ -746,7 +746,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;

adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);

if (!adev->gmc.xgmi.ras_if) {
adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
Expand Down Expand Up @@ -865,7 +865,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
return 0;
}

static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
Expand All @@ -874,7 +874,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
uint32_t ue_cnt = 0, ce_cnt = 0;

if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL))
return -EINVAL;
return ;

err_data->ue_count = 0;
err_data->ce_count = 0;
Expand Down Expand Up @@ -940,17 +940,23 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
break;
}

adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev);

err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt;

return 0;
}

const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
.ras_late_init = amdgpu_xgmi_ras_late_init,
.ras_fini = amdgpu_xgmi_ras_fini,
struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = {
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
};

struct amdgpu_xgmi_ras xgmi_ras = {
.ras_block = {
.name = "xgmi",
.block = AMDGPU_RAS_BLOCK__XGMI_WAFL,
.hw_ops = &xgmi_ras_hw_ops,
.ras_late_init = amdgpu_xgmi_ras_late_init,
.ras_fini = amdgpu_xgmi_ras_fini,
},
};
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
Expand Up @@ -24,7 +24,7 @@

#include <drm/task_barrier.h>
#include "amdgpu_psp.h"

#include "amdgpu_ras.h"

struct amdgpu_hive_info {
struct kobject kobj;
Expand All @@ -50,7 +50,7 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift;
};

extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs;
extern struct amdgpu_xgmi_ras xgmi_ras;
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
Expand Down

0 comments on commit 6c24538

Please sign in to comment.