From c270cea5b76d49268796df46f2ac12bb36c018e3 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 13 Dec 2022 20:10:02 +0800 Subject: [PATCH 1/6] fix(rpc): if all vnode in one group can not connect, return TSDB_CODE_RPC_VGROUP_NOT_REACHED --- src/inc/taoserror.h | 1 + src/rpc/src/rpcMain.c | 4 ++++ src/util/src/terror.c | 1 + 3 files changed, 6 insertions(+) diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 40f1c3096ea..e7e0ec634e1 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -61,6 +61,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_FQDN_ERROR TAOS_DEF_ERROR_CODE(0, 0x0015) //"Unable to resolve FQDN" #define TSDB_CODE_RPC_INVALID_VERSION TAOS_DEF_ERROR_CODE(0, 0x0016) //"Invalid app version" #define TSDB_CODE_RPC_SHORTCUT TAOS_DEF_ERROR_CODE(0, 0x0017) //"Shortcut" +#define TSDB_CODE_RPC_VGROUP_NOT_REACHED TAOS_DEF_ERROR_CODE(0, 0x0018) //"Vgroup could not be reached" //common & util #define TSDB_CODE_COM_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) //"Operation not supported" diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 5c385410080..3b1e30eac50 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1515,6 +1515,10 @@ static void rpcProcessConnError(void *param, void *id) { rpcMsg.pCont = NULL; rpcMsg.contLen = 0; + if( pContext->numOfTry >= pContext->epSet.numOfEps && rpcMsg.code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + rpcMsg.code = TSDB_CODE_RPC_VGROUP_NOT_REACHED; + } + tWarn("%s %p, connection error. notify client query over. numOfTry=%d msgType=%d", pRpc->label, pContext->ahandle, pContext->numOfTry, pContext->msgType); rpcNotifyClient(pContext, &rpcMsg); } else { diff --git a/src/util/src/terror.c b/src/util/src/terror.c index b0420166a3c..ad421e29b7a 100644 --- a/src/util/src/terror.c +++ b/src/util/src/terror.c @@ -126,6 +126,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_PRECISION_TYPE, "Invalid timestamp pre TAOS_DEFINE_ERROR(TSDB_CODE_TSC_RES_TOO_MANY, "Result set too large to be output") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_TOO_MANY_SML_LINES, "Too many lines in batch") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_SEND_DATA_FAILED, "Client send request data failed") +TAOS_DEFINE_ERROR(TSDB_CODE_TSC_VGROUP_NOT_REACHED, "Vgroup could not be reached") // mnode TAOS_DEFINE_ERROR(TSDB_CODE_MND_MSG_NOT_PROCESSED, "Message not processed") From a3f33f68e378ad6b45a6e811257730a975442577 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 13 Dec 2022 20:25:55 +0800 Subject: [PATCH 2/6] fix(rpc): add error code --- src/util/src/terror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/src/terror.c b/src/util/src/terror.c index ad421e29b7a..9e8c58662df 100644 --- a/src/util/src/terror.c +++ b/src/util/src/terror.c @@ -69,6 +69,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_APP_NOT_READY, "Database not ready") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQDN") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_VERSION, "Invalid app version") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_SHORTCUT, "Shortcut") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_REACHED, "Vgroup could not be reached") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_COM_OPS_NOT_SUPPORT, "Operation not supported") @@ -126,7 +127,6 @@ TAOS_DEFINE_ERROR(TSDB_CODE_TSC_INVALID_PRECISION_TYPE, "Invalid timestamp pre TAOS_DEFINE_ERROR(TSDB_CODE_TSC_RES_TOO_MANY, "Result set too large to be output") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_TOO_MANY_SML_LINES, "Too many lines in batch") TAOS_DEFINE_ERROR(TSDB_CODE_TSC_SEND_DATA_FAILED, "Client send request data failed") -TAOS_DEFINE_ERROR(TSDB_CODE_TSC_VGROUP_NOT_REACHED, "Vgroup could not be reached") // mnode TAOS_DEFINE_ERROR(TSDB_CODE_MND_MSG_NOT_PROCESSED, "Message not processed") From 69d54dedd48050ade964b04b15ad6c6c363a18d9 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 13 Dec 2022 20:30:47 +0800 Subject: [PATCH 3/6] fix(rpc): add error code --- src/util/src/terror.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/src/terror.c b/src/util/src/terror.c index 9e8c58662df..7a2b562191a 100644 --- a/src/util/src/terror.c +++ b/src/util/src/terror.c @@ -69,7 +69,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_APP_NOT_READY, "Database not ready") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQDN") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_VERSION, "Invalid app version") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_SHORTCUT, "Shortcut") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_REACHED, "Vgroup could not be reached") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_REACHED, "Vgroup could not be reached") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_COM_OPS_NOT_SUPPORT, "Operation not supported") From 4801003d9a8e524977620ac40beb32c591de7ae5 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Tue, 13 Dec 2022 20:38:58 +0800 Subject: [PATCH 4/6] fix(rpc): only submit and query return --- src/rpc/src/rpcMain.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 3b1e30eac50..d3396e374e6 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1516,7 +1516,9 @@ static void rpcProcessConnError(void *param, void *id) { rpcMsg.contLen = 0; if( pContext->numOfTry >= pContext->epSet.numOfEps && rpcMsg.code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { - rpcMsg.code = TSDB_CODE_RPC_VGROUP_NOT_REACHED; + if(pContext->msgType == TSDB_MSG_TYPE_SUBMIT || pContext->msgType == TSDB_MSG_TYPE_QUERY) { + rpcMsg.code = TSDB_CODE_RPC_VGROUP_NOT_REACHED; + } } tWarn("%s %p, connection error. notify client query over. numOfTry=%d msgType=%d", pRpc->label, pContext->ahandle, pContext->numOfTry, pContext->msgType); From fdbc52bca4d53c1e7df8bf575e7ea1a9675d34f9 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 14 Dec 2022 10:38:23 +0800 Subject: [PATCH 5/6] fix(rpc): check TSDB_CODE_RPC_VGROUP_NOT_CONNECTED like TSDB_CODE_RPC_NETWORK_UNAVAIL condition --- src/client/src/tscLocal.c | 4 ++-- src/client/src/tscParseLineProtocol.c | 3 ++- src/client/src/tscServer.c | 1 + src/client/src/tscSubquery.c | 1 + src/inc/taoserror.h | 2 +- src/rpc/src/rpcMain.c | 2 +- src/util/src/terror.c | 2 +- src/vnode/src/vnodeRead.c | 5 +++-- 8 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/client/src/tscLocal.c b/src/client/src/tscLocal.c index d501927a344..898ca253aac 100644 --- a/src/client/src/tscLocal.c +++ b/src/client/src/tscLocal.c @@ -910,7 +910,7 @@ static int32_t tscProcessServStatus(SSqlObj *pSql) { pSql->res.code = pHb->res.code; } - if (pSql->res.code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (pSql->res.code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pSql->res.code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { taosReleaseRef(tscObjRef, pObj->hbrid); return pSql->res.code; } @@ -920,7 +920,7 @@ static int32_t tscProcessServStatus(SSqlObj *pSql) { taosReleaseRef(tscObjRef, pObj->hbrid); } - if (pSql->res.code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (pSql->res.code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pSql->res.code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { return pSql->res.code; } diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 68610d8486c..542dc5c6527 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -974,6 +974,7 @@ static void insertCallback(void *param, TAOS_RES *res, int32_t notUsedCode) { || code == TSDB_CODE_VND_INVALID_VGROUP_ID || code == TSDB_CODE_TDB_TABLE_RECONFIGURE || code == TSDB_CODE_APP_NOT_READY + || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || code == TSDB_CODE_RPC_NETWORK_UNAVAIL) && batch->tryTimes < TSDB_MAX_REPLICA) { batch->tryAgain = true; } @@ -985,7 +986,7 @@ static void insertCallback(void *param, TAOS_RES *res, int32_t notUsedCode) { } } - if (code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (code == TSDB_CODE_APP_NOT_READY || code == TSDB_CODE_RPC_NETWORK_UNAVAIL || code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { if (batch->tryAgain) { batch->sleep = true; } diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 203538fbf41..db68f77a18b 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -509,6 +509,7 @@ bool shouldRewTableMeta(SSqlObj* pSql, SRpcMsg* rpcMsg) { rpcMsg->code != TSDB_CODE_VND_INVALID_VGROUP_ID && rpcMsg->code != TSDB_CODE_QRY_INVALID_SCHEMA_VERSION && rpcMsg->code != TSDB_CODE_RPC_NETWORK_UNAVAIL && + rpcMsg->code != TSDB_CODE_RPC_VGROUP_NOT_CONNECTED && rpcMsg->code != TSDB_CODE_APP_NOT_READY ) { return false; } diff --git a/src/client/src/tscSubquery.c b/src/client/src/tscSubquery.c index 33883a1ab0c..3392424b459 100644 --- a/src/client/src/tscSubquery.c +++ b/src/client/src/tscSubquery.c @@ -3462,6 +3462,7 @@ static bool needRetryInsert(SSqlObj* pParentObj) { if (code != TSDB_CODE_TDB_TABLE_RECONFIGURE && code != TSDB_CODE_TDB_INVALID_TABLE_ID && code != TSDB_CODE_VND_INVALID_VGROUP_ID && code != TSDB_CODE_RPC_NETWORK_UNAVAIL && + code != TSDB_CODE_RPC_VGROUP_NOT_CONNECTED && code != TSDB_CODE_APP_NOT_READY) { pParentObj->res.code = code; ret = false; diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index e7e0ec634e1..f8080ca6e5d 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -61,7 +61,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_RPC_FQDN_ERROR TAOS_DEF_ERROR_CODE(0, 0x0015) //"Unable to resolve FQDN" #define TSDB_CODE_RPC_INVALID_VERSION TAOS_DEF_ERROR_CODE(0, 0x0016) //"Invalid app version" #define TSDB_CODE_RPC_SHORTCUT TAOS_DEF_ERROR_CODE(0, 0x0017) //"Shortcut" -#define TSDB_CODE_RPC_VGROUP_NOT_REACHED TAOS_DEF_ERROR_CODE(0, 0x0018) //"Vgroup could not be reached" +#define TSDB_CODE_RPC_VGROUP_NOT_CONNECTED TAOS_DEF_ERROR_CODE(0, 0x0018) //"Vgroup could not be connected" //common & util #define TSDB_CODE_COM_OPS_NOT_SUPPORT TAOS_DEF_ERROR_CODE(0, 0x0100) //"Operation not supported" diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index d3396e374e6..6e81f5dcbb2 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1517,7 +1517,7 @@ static void rpcProcessConnError(void *param, void *id) { if( pContext->numOfTry >= pContext->epSet.numOfEps && rpcMsg.code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { if(pContext->msgType == TSDB_MSG_TYPE_SUBMIT || pContext->msgType == TSDB_MSG_TYPE_QUERY) { - rpcMsg.code = TSDB_CODE_RPC_VGROUP_NOT_REACHED; + rpcMsg.code = TSDB_CODE_RPC_VGROUP_NOT_CONNECTED; } } diff --git a/src/util/src/terror.c b/src/util/src/terror.c index 7a2b562191a..4852fe653f3 100644 --- a/src/util/src/terror.c +++ b/src/util/src/terror.c @@ -69,7 +69,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_APP_NOT_READY, "Database not ready") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_FQDN_ERROR, "Unable to resolve FQDN") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_INVALID_VERSION, "Invalid app version") TAOS_DEFINE_ERROR(TSDB_CODE_RPC_SHORTCUT, "Shortcut") -TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_REACHED, "Vgroup could not be reached") +TAOS_DEFINE_ERROR(TSDB_CODE_RPC_VGROUP_NOT_CONNECTED, "Vgroup could not be connected") //common & util TAOS_DEFINE_ERROR(TSDB_CODE_COM_OPS_NOT_SUPPORT, "Operation not supported") diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index e8495cac6d7..8255c925428 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -145,7 +145,8 @@ int32_t vnodeWriteToRQueue(void *vparam, void *pCont, int32_t contLen, int8_t qt atomic_add_fetch_32(&pVnode->queuedRMsg, 1); - if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_MSG_TYPE_FETCH) { + if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || + pRead->msgType == TSDB_MSG_TYPE_FETCH) { vTrace("vgId:%d, write into vfetch queue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount, pVnode->queuedRMsg); return taosWriteQitem(pVnode->fqueue, qtype, pRead); @@ -229,7 +230,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SVReadMsg *pRead) { memset(pRet, 0, sizeof(SRspRet)); // qHandle needs to be freed correctly - if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL) { + if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED) { vError("error rpc msg in query, %s", tstrerror(pRead->code)); } From 7e2b83502187204d934838ca37b2cd898edca5b5 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 14 Dec 2022 14:19:57 +0800 Subject: [PATCH 6/6] fix(query): VNodeRead.c modify pRead->code --- src/vnode/src/vnodeRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 8255c925428..a492ca7beaf 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -145,7 +145,7 @@ int32_t vnodeWriteToRQueue(void *vparam, void *pCont, int32_t contLen, int8_t qt atomic_add_fetch_32(&pVnode->queuedRMsg, 1); - if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->msgType == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || + if (pRead->code == TSDB_CODE_RPC_NETWORK_UNAVAIL || pRead->code == TSDB_CODE_RPC_VGROUP_NOT_CONNECTED || pRead->msgType == TSDB_MSG_TYPE_FETCH) { vTrace("vgId:%d, write into vfetch queue, refCount:%d queued:%d", pVnode->vgId, pVnode->refCount, pVnode->queuedRMsg);