diff --git a/meta/sai_meta.cpp b/meta/sai_meta.cpp index 731db361c95..aaf281ba03b 100644 --- a/meta/sai_meta.cpp +++ b/meta/sai_meta.cpp @@ -6015,6 +6015,45 @@ void meta_sai_on_fdb_flush_event_consolidated( } } +void meta_fdb_event_snoop_oid( + _In_ sai_object_id_t oid) +{ + SWSS_LOG_ENTER(); + + if (oid == SAI_NULL_OBJECT_ID) + return; + + if (object_reference_exists(oid)) + return; + + sai_object_type_t ot = sai_object_type_query(oid); + + if (ot == SAI_OBJECT_TYPE_NULL) + { + SWSS_LOG_ERROR("failed to get object type on fdb_event oid: 0x%lx", oid); + return; + } + + sai_object_meta_key_t key = { .objecttype = ot, .objectkey = { .key = { .object_id = oid } } }; + + object_reference_insert(oid); + + if (!object_exists(key)) + create_object(key); + + /* + * In normal operation orch agent should query or create all bridge, vlan + * and bridge port, so we should not get this message. Let's put it as + * warning for better visibility. Most likely if this happen there is a + * vendor bug in SAI and we should also see warnings or errors reported + * from syncd in logs. + */ + + SWSS_LOG_WARN("fdb_entry oid (snoop): %s: %s", + sai_serialize_object_type(ot).c_str(), + sai_serialize_object_id(oid).c_str()); +} + void meta_sai_on_fdb_event_single( _In_ const sai_fdb_event_notification_data_t& data) { @@ -6024,6 +6063,35 @@ void meta_sai_on_fdb_event_single( std::string key_fdb = sai_serialize_object_meta_key(meta_key_fdb); + /* + * Because we could receive fdb event's before orch agent will query or + * create bridge/vlan/bridge port we should snoop here new OIDs and put + * them in local DB. + * + * Unfortunately we don't have a way to check whether those OIDs are correct + * or whether there maybe some bug in vendor SAI and for example is sending + * invalid OIDs in those event's. Also sai_object_type_query can return + * valid object type for OID, but this does not guarantee that this OID is + * valid, for example one of existing bridge ports that orch agent didn't + * query yet. + */ + + meta_fdb_event_snoop_oid(data.fdb_entry.bv_id); + + for (uint32_t i = 0; i < data.attr_count; i++) + { + auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_FDB_ENTRY, data.attr[i].id); + + if (meta == NULL) + { + SWSS_LOG_ERROR("failed to get metadata for fdb_entry attr.id = %d", data.attr[i].id); + continue; + } + + if (meta->attrvaluetype == SAI_ATTR_VALUE_TYPE_OBJECT_ID) + meta_fdb_event_snoop_oid(data.attr[i].value.oid); + } + switch (data.event_type) { case SAI_FDB_EVENT_LEARNED: @@ -6096,6 +6164,33 @@ void meta_sai_on_fdb_event_single( break; + case SAI_FDB_EVENT_MOVE: + + if (!object_exists(key_fdb)) + { + SWSS_LOG_WARN("object key %s doesn't exist but received FDB MOVE event", key_fdb.c_str()); + break; + } + + // on MOVE event, just update attributes on existing entry + + for (uint32_t i = 0; i < data.attr_count; i++) + { + const sai_attribute_t& attr = data.attr[i]; + + sai_status_t status = meta_generic_validation_set(meta_key_fdb, &attr); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("object key %s FDB MOVE event, SET validateion failed on attr.id = %d", key_fdb.c_str(), attr.id); + continue; + } + + meta_generic_validation_post_set(meta_key_fdb, &attr); + } + + break; + default: SWSS_LOG_ERROR("got FDB_ENTRY notification with unknown event_type %d, bug?", data.event_type); diff --git a/saiplayer/saiplayer.cpp b/saiplayer/saiplayer.cpp index c70dbda5a05..e6a28176873 100644 --- a/saiplayer/saiplayer.cpp +++ b/saiplayer/saiplayer.cpp @@ -1079,6 +1079,8 @@ void processBulk( } } +bool g_sleep = false; + int replay(int argc, char **argv) { //swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG); @@ -1246,10 +1248,37 @@ int replay(int argc, char **argv) catch (const std::exception &e) { SWSS_LOG_NOTICE("line: %s", line.c_str()); - SWSS_LOG_NOTICE("resp: %s", response.c_str()); + SWSS_LOG_NOTICE("resp (expected): %s", response.c_str()); + SWSS_LOG_NOTICE("got: %s", sai_serialize_status(status).c_str()); + + if (api == SAI_COMMON_API_GET && (status == SAI_STATUS_SUCCESS || status == SAI_STATUS_BUFFER_OVERFLOW)) + { + // log each get parameter + for (uint32_t i = 0; i < attr_count; ++i) + { + auto meta = sai_metadata_get_attr_metadata(object_type, attr_list[i].id); + + auto val = sai_serialize_attr_value(*meta, attr_list[i]); + + SWSS_LOG_NOTICE(" - %s:%s", meta->attridname, val.c_str()); + } + } exit(EXIT_FAILURE); } + + if (api == SAI_COMMON_API_GET && (status == SAI_STATUS_SUCCESS || status == SAI_STATUS_BUFFER_OVERFLOW)) + { + // log each get parameter + for (uint32_t i = 0; i < attr_count; ++i) + { + auto meta = sai_metadata_get_attr_metadata(object_type, attr_list[i].id); + + auto val = sai_serialize_attr_value(*meta, attr_list[i]); + + SWSS_LOG_NOTICE(" - %s:%s", meta->attridname, val.c_str()); + } + } } } @@ -1257,6 +1286,13 @@ int replay(int argc, char **argv) SWSS_LOG_NOTICE("finished replaying %s with SUCCESS", filename); + if (g_sleep) + { + fprintf(stderr, "Reply SUCCESS, sleeping, watching for notifications\n"); + + sleep(-1); + } + return 0; } @@ -1273,6 +1309,8 @@ void printUsage() std::cout << " Enable temporary view between init and apply" << std::endl << std::endl; std::cout << " -i --inspectAsic:" << std::endl; std::cout << " Inspect ASIC by ASIC DB" << std::endl << std::endl; + std::cout << " -s --sleep:" << std::endl; + std::cout << " Sleep after success reply, to notice any switch notifications" << std::endl << std::endl; std::cout << " -h --help:" << std::endl; std::cout << " Print out this message" << std::endl << std::endl; } @@ -1293,10 +1331,11 @@ int handleCmdLine(int argc, char **argv) { "skipNotifySyncd", no_argument, 0, 'C' }, { "enableDebug", no_argument, 0, 'd' }, { "inspectAsic", no_argument, 0, 'i' }, + { "sleep", no_argument, 0, 's' }, { 0, 0, 0, 0 } }; - const char* const optstring = "hCdui"; + const char* const optstring = "hCduis"; int c = getopt_long(argc, argv, optstring, long_options, 0); @@ -1321,6 +1360,10 @@ int handleCmdLine(int argc, char **argv) g_inspectAsic = true; break; + case 's': + g_sleep = true; + break; + case 'h': printUsage(); exit(EXIT_SUCCESS); diff --git a/syncd/syncd.cpp b/syncd/syncd.cpp index ca3160d7c7b..cc57fa6f429 100644 --- a/syncd/syncd.cpp +++ b/syncd/syncd.cpp @@ -448,6 +448,34 @@ sai_object_id_t translate_rid_to_vid( return vid; } +/** + * @brief Check if RID exists on the ASIC DB. + * + * @param rid Real object id to check. + * + * @return True if exists or SAI_NULL_OBJECT_ID, otherwise false. + */ +bool check_rid_exists( + _In_ sai_object_id_t rid) +{ + SWSS_LOG_ENTER(); + + if (rid == SAI_NULL_OBJECT_ID) + return true; + + if (local_rid_to_vid.find(rid) != local_rid_to_vid.end()) + return true; + + std::string str_rid = sai_serialize_object_id(rid); + + auto pvid = g_redisClient->hget(RIDTOVID, str_rid); + + if (pvid != NULL) + return true; + + return false; +} + void translate_list_rid_to_vid( _In_ sai_object_list_t &element, _In_ sai_object_id_t switch_id) diff --git a/syncd/syncd.h b/syncd/syncd.h index 6cc7fc80f36..9ce4dc4bbcd 100644 --- a/syncd/syncd.h +++ b/syncd/syncd.h @@ -110,6 +110,9 @@ sai_object_id_t translate_rid_to_vid( _In_ sai_object_id_t rid, _In_ sai_object_id_t switch_vid); +bool check_rid_exists( + _In_ sai_object_id_t rid); + void translate_rid_to_vid_list( _In_ sai_object_type_t object_type, _In_ sai_object_id_t switch_vid, diff --git a/syncd/syncd_notifications.cpp b/syncd/syncd_notifications.cpp index 2b53828a6f6..824c0e7eaa2 100644 --- a/syncd/syncd_notifications.cpp +++ b/syncd/syncd_notifications.cpp @@ -240,18 +240,73 @@ void redisPutFdbEntryToAsicView( g_redisClient->hset(key, strAttrId, strAttrValue); } +void check_fdb_event_notification_data( + _In_ const sai_fdb_event_notification_data_t& data) +{ + SWSS_LOG_ENTER(); + + /* + * Any new RID value spotted in fdb notification can happen for 2 reasons: + * + * - a bug is present on the vendor SAI, all RID's are already in local or + * REDIS ASIC DB but vendor SAI returned new or invalid RID + * + * - orch agent didn't query yet bridge ID/vlan ID and already + * started to receive fdb notifications in which case warn message + * could be ignored. + * + * If vendor SAI will return invalid RID, then this will later on lead to + * inconsistent DB state and possible failure on apply view after cold or + * warm boot. + * + * On switch init we do discover phase, and we should discover all objects + * so we should not get any of those messages if SAI is in consistent + * state. + */ + + if (!check_rid_exists(data.fdb_entry.bv_id)) + SWSS_LOG_ERROR("bv_id RID 0x%lx is not present on local ASIC DB: %s", data.fdb_entry.bv_id, + sai_serialize_fdb_entry(data.fdb_entry).c_str()); + + if (!check_rid_exists(data.fdb_entry.switch_id) || data.fdb_entry.switch_id == SAI_NULL_OBJECT_ID) + SWSS_LOG_ERROR("switch_id RID 0x%lx is not present on local ASIC DB: %s", data.fdb_entry.bv_id, + sai_serialize_fdb_entry(data.fdb_entry).c_str()); + + for (uint32_t i = 0; i < data.attr_count; i++) + { + const sai_attribute_t& attr = data.attr[i]; + + auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_FDB_ENTRY, attr.id); + + if (meta == NULL) + { + SWSS_LOG_ERROR("unable to get metadata for fdb_entry attr.id = %d", attr.id); + continue; + } + + // skip non oid attributes + if (meta->attrvaluetype != SAI_ATTR_VALUE_TYPE_OBJECT_ID) + continue; + + if (!check_rid_exists(attr.value.oid)) + SWSS_LOG_WARN("RID 0x%lx on %s is not present on local ASIC DB", attr.value.oid, meta->attridname); + } +} + void process_on_fdb_event( _In_ uint32_t count, _In_ sai_fdb_event_notification_data_t *data) { SWSS_LOG_ENTER(); - SWSS_LOG_DEBUG("fdb event count: %d", count); + SWSS_LOG_INFO("fdb event count: %u", count); for (uint32_t i = 0; i < count; i++) { sai_fdb_event_notification_data_t *fdb = &data[i]; + check_fdb_event_notification_data(*fdb); + SWSS_LOG_DEBUG("fdb %u: type: %d", i, fdb->event_type); fdb->fdb_entry.switch_id = translate_rid_to_vid(fdb->fdb_entry.switch_id, SAI_NULL_OBJECT_ID); @@ -536,7 +591,7 @@ bool ntf_queue_t::enqueue( if (!(log_count % 1000)) { - SWSS_LOG_NOTICE("Too many messages in queue (%ld), dropped %ld FDB events!", + SWSS_LOG_NOTICE("Too many messages in queue (%zu), dropped %u FDB events!", queueStats(), (log_count+1)); } @@ -782,6 +837,6 @@ void check_notifications_pointers( * Here we translated pointer, just log it. */ - SWSS_LOG_INFO("%s: %lp (orch) => %lp (syncd)", meta->attridname, prev, attr.value.ptr); + SWSS_LOG_INFO("%s: 0x%lX (orch) => 0x%lX (syncd)", meta->attridname, (uint64_t)prev, (uint64_t)attr.value.ptr); } }