Skip to content

Commit

Permalink
Fix issue #13341 ARP entry can be out of sync between kernel and APPL…
Browse files Browse the repository at this point in the history
…_DB if multiple updates are received from RTNL (#2619)

- What I did
Fix issue sonic-net/sonic-buildimage#13341 the issue that ARP entry is out of sync between kernel and APPL_DB
In AppRestartAssist::insertToMap, in case an entry has been updated more than once with the same value but different from the stored one, keep the state as NEW.

Eg.
Assume the entry's value that is restored from the warm reboot is V0, the following events received.

The first update with value V1 is received and handled by the if (found != appTableCacheMap[tableName].end()) branch,
* the state is set to NEW
* value is updated to V1
The second update with the same value V1 is received and handled by this branch
Originally, the state was set to SAME, which is wrong because V1 is different from the stored value V0
The correct logic should be: set the state to SAME only if the state is not NEW
This is a very rare case because most of times, the entry won't be updated multiple times

- Why I did it
To fix the issue.

- How I verified it
Mock test is added to cover the case.

Signed-off-by: Stephen Sun <stephens@nvidia.com>
  • Loading branch information
stephenxs authored and StormLiangMS committed Feb 10, 2023
1 parent da0cf7a commit c41a1b7
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 6 deletions.
6 changes: 4 additions & 2 deletions tests/mock_tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ LDADD_GTEST = -L/usr/src/gtest

## Orchagent Unit Tests

tests_INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/orchagent -I$(P4_ORCH_DIR)/tests
tests_INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/orchagent -I$(P4_ORCH_DIR)/tests -I$(top_srcdir)/warmrestart

tests_SOURCES = aclorch_ut.cpp \
portsorch_ut.cpp \
Expand Down Expand Up @@ -49,6 +49,7 @@ tests_SOURCES = aclorch_ut.cpp \
swssnet_ut.cpp \
flowcounterrouteorch_ut.cpp \
orchdaemon_ut.cpp \
warmrestartassist_ut.cpp \
$(top_srcdir)/lib/gearboxutils.cpp \
$(top_srcdir)/lib/subintf.cpp \
$(top_srcdir)/orchagent/orchdaemon.cpp \
Expand Down Expand Up @@ -105,7 +106,8 @@ tests_SOURCES = aclorch_ut.cpp \
$(top_srcdir)/orchagent/srv6orch.cpp \
$(top_srcdir)/orchagent/nvgreorch.cpp \
$(top_srcdir)/cfgmgr/portmgr.cpp \
$(top_srcdir)/cfgmgr/buffermgrdyn.cpp
$(top_srcdir)/cfgmgr/buffermgrdyn.cpp \
$(top_srcdir)/warmrestart/warmRestartAssist.cpp

tests_SOURCES += $(FLEX_CTR_DIR)/flex_counter_manager.cpp $(FLEX_CTR_DIR)/flex_counter_stat_manager.cpp $(FLEX_CTR_DIR)/flow_counter_handler.cpp $(FLEX_CTR_DIR)/flowcounterrouteorch.cpp
tests_SOURCES += $(DEBUG_CTR_DIR)/debug_counter.cpp $(DEBUG_CTR_DIR)/drop_counter.cpp
Expand Down
64 changes: 64 additions & 0 deletions tests/mock_tests/warmrestartassist_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#define protected public
#include "orch.h"
#undef protected
#include "ut_helper.h"
//#include "mock_orchagent_main.h"
#include "mock_table.h"
#include "warm_restart.h"
#define private public
#include "warmRestartAssist.h"
#undef private

#define APP_WRA_TEST_TABLE_NAME "TEST_TABLE"

namespace warmrestartassist_test
{
using namespace std;

shared_ptr<swss::DBConnector> m_app_db = make_shared<swss::DBConnector>("APPL_DB", 0);
shared_ptr<swss::RedisPipeline> m_app_db_pipeline = make_shared<swss::RedisPipeline>(m_app_db.get());
shared_ptr<swss::ProducerStateTable> m_wra_test_table = make_shared<swss::ProducerStateTable>(m_app_db.get(), APP_WRA_TEST_TABLE_NAME);

AppRestartAssist *appRestartAssist;

struct WarmrestartassistTest : public ::testing::Test
{
WarmrestartassistTest()
{
appRestartAssist = new AppRestartAssist(m_app_db_pipeline.get(), "testsyncd", "swss", 0);
appRestartAssist->m_warmStartInProgress = true;
appRestartAssist->registerAppTable(APP_WRA_TEST_TABLE_NAME, m_wra_test_table.get());
}

void SetUp() override
{
testing_db::reset();

Table testTable = Table(m_app_db.get(), APP_WRA_TEST_TABLE_NAME);
testTable.set("key",
{
{"field", "value0"},
});
}

void TearDown() override
{
}
};

TEST_F(WarmrestartassistTest, warmRestartAssistTest)
{
appRestartAssist->readTablesToMap();
vector<FieldValueTuple> fvVector;
fvVector.emplace_back("field", "value1");
appRestartAssist->insertToMap(APP_WRA_TEST_TABLE_NAME, "key", fvVector, false);
appRestartAssist->insertToMap(APP_WRA_TEST_TABLE_NAME, "key", fvVector, false);
appRestartAssist->reconcile();

fvVector.clear();
Table testTable = Table(m_app_db.get(), APP_WRA_TEST_TABLE_NAME);
ASSERT_TRUE(testTable.get("key", fvVector));
ASSERT_EQ(fvField(fvVector[0]), "field");
ASSERT_EQ(fvValue(fvVector[0]), "value1");
}
}
29 changes: 25 additions & 4 deletions warmrestart/warmRestartAssist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,31 @@ void AppRestartAssist::insertToMap(string tableName, string key, vector<FieldVal
}
else
{
SWSS_LOG_INFO("%s, found key: %s, same value", tableName.c_str(), key.c_str());

// mark as SAME flag
setCacheEntryState(found->second, SAME);
auto state = getCacheEntryState(found->second);
/*
* In case an entry has been updated for more than once with the same value but different from the stored one,
* keep the state as NEW.
* Eg.
* Assume the entry's value that is restored from last warm reboot is V0.
* 1. The first update with value V1 is received and handled by the above `if (found != appTableCacheMap[tableName].end())` branch,
* - state is set to NEW
* - value is updated to V1
* 2. The second update with the same value V1 is received and handled by this branch
* - Originally, state was set to SAME, which is wrong because V1 is different from the stored value V0
* - The correct logic should be: set the state to same only if the state is not NEW
* This is a very rare case because in most of times the entry won't be updated for multiple times
*/
if (state == NEW)
{
SWSS_LOG_NOTICE("%s, found key: %s, it has been updated for the second time, keep state as NEW",
tableName.c_str(), key.c_str());
}
else
{
SWSS_LOG_INFO("%s, found key: %s, same value", tableName.c_str(), key.c_str());
// mark as SAME flag
setCacheEntryState(found->second, SAME);
}
}
}
else
Expand Down

0 comments on commit c41a1b7

Please sign in to comment.