diff --git a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/AgentStateClientImpl.java b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/AgentStateClientImpl.java index 90279b458f..86c9c16091 100644 --- a/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/AgentStateClientImpl.java +++ b/src/backend/commons/gse-sdk/src/main/java/com/tencent/bk/job/common/gse/service/AgentStateClientImpl.java @@ -24,8 +24,6 @@ package com.tencent.bk.job.common.gse.service; -import com.tencent.bk.job.common.constant.ErrorCode; -import com.tencent.bk.job.common.exception.InternalException; import com.tencent.bk.job.common.gse.GseClient; import com.tencent.bk.job.common.gse.config.AgentStateQueryConfig; import com.tencent.bk.job.common.gse.constants.AgentAliveStatusEnum; @@ -73,14 +71,16 @@ public AgentState getAgentState(String agentId) { "cannot find agent state by agentId:{}", agentId ); - throw new InternalException(ErrorCode.GSE_API_DATA_ERROR, new String[]{msg.getMessage()}); + log.warn(msg.getMessage()); + return null; } else if (agentStateList.size() > 1) { FormattingTuple msg = MessageFormatter.format( - "multi({}) agent states by agentId:{}", + "multi({}) agent states by agentId:{}, use the first one", agentStateList.size(), agentId ); - throw new InternalException(ErrorCode.GSE_API_DATA_ERROR, new String[]{msg.getMessage()}); + log.warn(msg.getMessage()); + return agentStateList.get(0); } return agentStateList.get(0); } diff --git a/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/EventsHandler.java b/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/EventsHandler.java index e5464313d2..ae57823b34 100644 --- a/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/EventsHandler.java +++ b/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/EventsHandler.java @@ -76,10 +76,10 @@ void handleEventWithTrace(ResourceEvent event) { Span span = buildSpan(); try (Tracer.SpanInScope ignored = this.tracer.withSpan(span.start())) { handleEvent(event); - } catch (Exception e) { - span.error(e); + } catch (Throwable t) { + span.error(t); eventHandleResult = MetricsConstants.TAG_VALUE_CMDB_EVENT_HANDLE_RESULT_FAILED; - throw e; + log.warn("Fail to handleOneEvent:" + event, t); } finally { span.end(); long timeConsuming = System.currentTimeMillis() - event.getCreateTime(); @@ -103,14 +103,14 @@ private Tags buildEventHandleTimeTags(String eventHandleResult) { @Override public void run() { while (enabled) { - ResourceEvent event = null; + ResourceEvent event; try { event = queue.take(); handleEventWithTrace(event); } catch (InterruptedException e) { log.warn("queue.take interrupted", e); } catch (Throwable t) { - log.warn("Fail to handleOneEvent:" + event, t); + log.error("Fail to handleEventWithTrace", t); } } } diff --git a/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/HostEventHandler.java b/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/HostEventHandler.java index cfa97ea223..392a172c0b 100644 --- a/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/HostEventHandler.java +++ b/src/backend/job-manage/service-job-manage/src/main/java/com/tencent/bk/job/manage/service/impl/sync/HostEventHandler.java @@ -37,6 +37,8 @@ import io.micrometer.core.instrument.Tags; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import org.slf4j.helpers.FormattingTuple; +import org.slf4j.helpers.MessageFormatter; import org.springframework.cloud.sleuth.Tracer; import java.util.concurrent.BlockingQueue; @@ -100,8 +102,8 @@ private void handleOneEventIndeed(ResourceEvent event) { log.warn("Ignore hostEvent without hostId:{}", event); break; } - // 找出Agent有效的IP,并设置Agent状态 - updateIpAndAgentStatus(hostInfoDTO); + // 尝试设置Agent状态 + tryToUpdateAgentStatus(hostInfoDTO); // 更新DB与缓存中的主机数据 hostService.createOrUpdateHostBeforeLastTime(hostInfoDTO); break; @@ -113,12 +115,20 @@ private void handleOneEventIndeed(ResourceEvent event) { } } - private void updateIpAndAgentStatus(ApplicationHostDTO hostInfoDTO) { - String agentId = StringUtils.isNotBlank(hostInfoDTO.getAgentId()) ? - hostInfoDTO.getAgentId() : hostInfoDTO.getCloudIp(); - AgentState agentState = agentStateClient.getAgentState(agentId); - if (agentState != null) { - hostInfoDTO.setGseAgentStatus(agentState.getStatusCode()); + private void tryToUpdateAgentStatus(ApplicationHostDTO hostInfoDTO) { + try { + String agentId = StringUtils.isNotBlank(hostInfoDTO.getAgentId()) ? + hostInfoDTO.getAgentId() : hostInfoDTO.getCloudIp(); + AgentState agentState = agentStateClient.getAgentState(agentId); + if (agentState != null) { + hostInfoDTO.setGseAgentStatus(agentState.getStatusCode()); + } + } catch (Exception e) { + FormattingTuple msg = MessageFormatter.format( + "Fail to UpdateAgentStatus, host={}", + hostInfoDTO + ); + log.warn(msg.getMessage(), e); } }