diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2430d6223c2d73..d86ee530e0a433 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -454,12 +454,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) return r; } - if (!adev->gmc.xgmi.connected_to_cpu) - adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs; + if (!adev->gmc.xgmi.connected_to_cpu) { + adev->gmc.xgmi.ras = &xgmi_ras; + amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + } - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->ras_late_init) { - r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev); + if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) { + r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL); if (r) return r; } @@ -505,9 +506,8 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) adev->mmhub.ras_funcs->ras_fini) adev->mmhub.ras_funcs->ras_fini(adev); - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->ras_fini) - adev->gmc.xgmi.ras_funcs->ras_fini(adev); + if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini) + adev->gmc.xgmi.ras->ras_block.ras_fini(adev); if (adev->hdp.ras_funcs && adev->hdp.ras_funcs->ras_fini) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 8458cebc6d5b83..0001631cfedb77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -29,6 +29,7 @@ #include #include "amdgpu_irq.h" +#include "amdgpu_ras.h" /* VA hole for 48bit addresses on Vega10 */ #define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL @@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs { unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); }; -struct amdgpu_xgmi_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - int (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); +struct amdgpu_xgmi_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_xgmi { @@ -159,7 +156,7 @@ struct amdgpu_xgmi { struct ras_common_if *ras_if; bool connected_to_cpu; bool pending_reset; - const struct amdgpu_xgmi_ras_funcs *ras_funcs; + struct amdgpu_xgmi_ras *ras; }; struct amdgpu_gmc { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a5812c21177ede..28997b7f7c9516 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1012,9 +1012,13 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data); break; case AMDGPU_RAS_BLOCK__XGMI_WAFL: - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->query_ras_error_count) - adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data); + if (!block_obj || !block_obj->hw_ops) { + dev_info(adev->dev, "%s doesn't config ras function \n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + if (block_obj->hw_ops->query_ras_error_count) + block_obj->hw_ops->query_ras_error_count(adev, &err_data); break; case AMDGPU_RAS_BLOCK__HDP: if (adev->hdp.ras_funcs && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index e8b8f28c2f723b..d29acd33eb1104 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return psp_xgmi_terminate(&adev->psp); } -static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info) { int r; struct ras_ih_if ih_info = { @@ -746,7 +746,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); + adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); if (!adev->gmc.xgmi.ras_if) { adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); @@ -865,7 +865,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -874,7 +874,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, uint32_t ue_cnt = 0, ce_cnt = 0; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) - return -EINVAL; + return ; err_data->ue_count = 0; err_data->ce_count = 0; @@ -940,17 +940,23 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, break; } - adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); + adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; - - return 0; } -const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = { - .ras_late_init = amdgpu_xgmi_ras_late_init, - .ras_fini = amdgpu_xgmi_ras_fini, +struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { .query_ras_error_count = amdgpu_xgmi_query_ras_error_count, .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, }; + +struct amdgpu_xgmi_ras xgmi_ras = { + .ras_block = { + .name = "xgmi", + .block = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .hw_ops = &xgmi_ras_hw_ops, + .ras_late_init = amdgpu_xgmi_ras_late_init, + .ras_fini = amdgpu_xgmi_ras_fini, + }, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index d2189bf7d428ef..0afca51c3c0c93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -24,7 +24,7 @@ #include #include "amdgpu_psp.h" - +#include "amdgpu_ras.h" struct amdgpu_hive_info { struct kobject kobj; @@ -50,7 +50,7 @@ struct amdgpu_pcs_ras_field { uint32_t pcs_err_shift; }; -extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs; +extern struct amdgpu_xgmi_ras xgmi_ras; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);