Skip to content

Commit

Permalink
Improve the way of handling BUFFER_PG during PFC storm (#1480)
Browse files Browse the repository at this point in the history
* Fix the issue: failed to updating BUFFER_PG during PFC storm

One action for the PFC storm is to set a zero buffer profile on the PG under PFC storm.
The zero buffer won't be removed until the PFC storm has gone.
If the user wants to modify the buffer profile for the PG, the bufferorch will return "task_need_retry".
General speaking it doesn't matter unless that:
- the system can't be warm rebooted until the PFC storm has gone.
- the "task_need_retry" will block the update of the entire BUFFER_PG table from being programmed to ASIC.
In this sense, we need a better solution.

The new solution will:
- record the new profile user wants to apply during PFC storm as the "pending profile" for that PG and return "task_success" if the PG is under PFC storm.
- apply the pending profile once the PG is unlocked.
- the latest pending profile will take effect in case user tries updating the profile for more than 1 times.

Signed-off-by: Stephen Sun <stephens@nvidia.com>

* Address review comments: add a pair of brackets for the if-block

Signed-off-by: Stephen Sun <stephens@nvidia.com>

* Fix ut error

Signed-off-by: Stephen Sun <stephens@nvidia.com>

* Fix typo

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs committed Dec 2, 2020
1 parent 095f481 commit 053aa15
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 11 deletions.
25 changes: 17 additions & 8 deletions orchagent/bufferorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ task_process_status BufferOrch::processPriorityGroup(Consumer &consumer)
for (string port_name : port_names)
{
Port port;
bool portUpdated = false;
SWSS_LOG_DEBUG("processing port:%s", port_name.c_str());
if (!gPortsOrch->getPort(port_name, port))
{
Expand All @@ -771,18 +772,26 @@ task_process_status BufferOrch::processPriorityGroup(Consumer &consumer)
}
if (port.m_priority_group_lock[ind])
{
SWSS_LOG_WARN("Priority group %zd on port %s is locked, will retry", ind, port_name.c_str());
return task_process_status::task_need_retry;
SWSS_LOG_WARN("Priority group %zd on port %s is locked, pending profile 0x%" PRIx64 " until unlocked", ind, port_name.c_str(), sai_buffer_profile);
portUpdated = true;
port.m_priority_group_pending_profile[ind] = sai_buffer_profile;
}
pg_id = port.m_priority_group_ids[ind];
SWSS_LOG_DEBUG("Applying buffer profile:0x%" PRIx64 " to port:%s pg index:%zd, pg sai_id:0x%" PRIx64, sai_buffer_profile, port_name.c_str(), ind, pg_id);
sai_status_t sai_status = sai_buffer_api->set_ingress_priority_group_attribute(pg_id, &attr);
if (sai_status != SAI_STATUS_SUCCESS)
else
{
SWSS_LOG_ERROR("Failed to set port:%s pg:%zd buffer profile attribute, status:%d", port_name.c_str(), ind, sai_status);
return task_process_status::task_failed;
pg_id = port.m_priority_group_ids[ind];
SWSS_LOG_DEBUG("Applying buffer profile:0x%" PRIx64 " to port:%s pg index:%zd, pg sai_id:0x%" PRIx64, sai_buffer_profile, port_name.c_str(), ind, pg_id);
sai_status_t sai_status = sai_buffer_api->set_ingress_priority_group_attribute(pg_id, &attr);
if (sai_status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("Failed to set port:%s pg:%zd buffer profile attribute, status:%d", port_name.c_str(), ind, sai_status);
return task_process_status::task_failed;
}
}
}
if (portUpdated)
{
gPortsOrch->setPort(port_name, port);
}
}

if (m_ready_list.find(key) != m_ready_list.end())
Expand Down
19 changes: 17 additions & 2 deletions orchagent/pfcactionhandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,10 +523,25 @@ PfcWdZeroBufferHandler::~PfcWdZeroBufferHandler(void)
return;
}

sai_object_id_t pg = portInstance.m_priority_group_ids[size_t(getQueueId())];
auto idx = size_t(getQueueId());
sai_object_id_t pg = portInstance.m_priority_group_ids[idx];
sai_object_id_t pending_profile_id = portInstance.m_priority_group_pending_profile[idx];

attr.id = SAI_INGRESS_PRIORITY_GROUP_ATTR_BUFFER_PROFILE;
attr.value.oid = m_originalPgBufferProfile;

if (pending_profile_id != SAI_NULL_OBJECT_ID)
{
attr.value.oid = pending_profile_id;
SWSS_LOG_NOTICE("Priority group %zd on port %s has been restored to pending profile 0x%" PRIx64,
idx, portInstance.m_alias.c_str(), pending_profile_id);
portInstance.m_priority_group_pending_profile[idx] = SAI_NULL_OBJECT_ID;
}
else
{
attr.value.oid = m_originalPgBufferProfile;
SWSS_LOG_NOTICE("Priority group %zd on port %s has been restored to original profile 0x%" PRIx64,
idx, portInstance.m_alias.c_str(), m_originalPgBufferProfile);
}

// Set our zero buffer profile
status = sai_buffer_api->set_ingress_priority_group_attribute(pg, &attr);
Expand Down
1 change: 1 addition & 0 deletions orchagent/port.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ class Port
*/
std::vector<bool> m_queue_lock;
std::vector<bool> m_priority_group_lock;
std::vector<sai_object_id_t> m_priority_group_pending_profile;

std::unordered_set<sai_object_id_t> m_ingress_acl_tables_uset;
std::unordered_set<sai_object_id_t> m_egress_acl_tables_uset;
Expand Down
1 change: 1 addition & 0 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3192,6 +3192,7 @@ void PortsOrch::initializePriorityGroups(Port &port)

port.m_priority_group_ids.resize(attr.value.u32);
port.m_priority_group_lock.resize(attr.value.u32);
port.m_priority_group_pending_profile.resize(attr.value.u32);

if (attr.value.u32 == 0)
{
Expand Down
16 changes: 15 additions & 1 deletion tests/mock_tests/portsorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,14 +416,28 @@ namespace portsorch_test
// process pool, profile and PGs
static_cast<Orch *>(gBufferOrch)->doTask();

// Port should have been updated by BufferOrch->doTask
gPortsOrch->getPort("Ethernet0", port);
auto profile_id = (*BufferOrch::m_buffer_type_maps["BUFFER_PROFILE"])[string("test_profile")].m_saiObjectId;
ASSERT_TRUE(profile_id != SAI_NULL_OBJECT_ID);
ASSERT_TRUE(port.m_priority_group_pending_profile[3] == profile_id);
ASSERT_TRUE(port.m_priority_group_pending_profile[4] == SAI_NULL_OBJECT_ID);

auto pgConsumer = static_cast<Consumer*>(gBufferOrch->getExecutor(CFG_BUFFER_PG_TABLE_NAME));
pgConsumer->dumpPendingTasks(ts);
ASSERT_FALSE(ts.empty()); // PG is skipped
ASSERT_TRUE(ts.empty()); // PG is stored in m_priority_group_pending_profile
ts.clear();

// release zero buffer drop handler
dropHandler.reset();

// re-fetch the port
gPortsOrch->getPort("Ethernet0", port);

// pending profile should be cleared
ASSERT_TRUE(port.m_priority_group_pending_profile[3] == SAI_NULL_OBJECT_ID);
ASSERT_TRUE(port.m_priority_group_pending_profile[4] == SAI_NULL_OBJECT_ID);

// process PGs
static_cast<Orch *>(gBufferOrch)->doTask();

Expand Down

0 comments on commit 053aa15

Please sign in to comment.