diff --git a/pkg/client/client.go b/pkg/client/client.go index 39d26bb..1d07cd8 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -1,9 +1,12 @@ package client import ( - "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/regions" + "net" "net/http" "net/url" + "time" + + "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common/regions" cbs "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/cbs/v20170312" cdb "github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/cdb/v20170320" @@ -45,7 +48,29 @@ func NewMonitorClient(cred common.CredentialIface, conf *config.TencentConfig, r } else { cpf.HttpProfile.Endpoint = "monitor.tencentcloudapi.com" } - return monitor.NewClient(cred, region, cpf) + return newClient(cred, region, cpf) +} + +func newClient(credential common.CredentialIface, + region string, clientProfile *profile.ClientProfile) (client *monitor.Client, err error) { + client = &monitor.Client{} + transport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + DialContext: (&net.Dialer{ + Timeout: 30 * time.Second, + KeepAlive: 5 * time.Second, + }).DialContext, + ForceAttemptHTTP2: true, + MaxIdleConns: 0, + IdleConnTimeout: 30 * time.Second, + TLSHandshakeTimeout: 30 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + } + clientProfile.HttpProfile.ReqTimeout = 5 + client.Init(region). + WithCredential(credential). + WithProfile(clientProfile).WithHttpTransport(transport) + return } func NewMongodbClient(cred common.CredentialIface, conf *config.TencentConfig) (*mongodb.Client, error) { diff --git a/pkg/collector/product.go b/pkg/collector/product.go index 174514e..d0e06c0 100644 --- a/pkg/collector/product.go +++ b/pkg/collector/product.go @@ -244,13 +244,14 @@ func (c *TcProductCollector) Collect(ch chan<- prometheus.Metric) (err error) { for _, query := range c.Querys { go func(q *metric.TcmQuery) { defer wg.Done() - pms, err := q.GetPromMetrics() - if err != nil { + pms, err0 := q.GetPromMetrics() + if err0 != nil { level.Error(c.logger).Log( "msg", "Get samples fail", "err", err, "metric", q.Metric.Id, ) + err = err0 } else { for _, pm := range pms { ch <- pm diff --git a/pkg/metric/repository.go b/pkg/metric/repository.go index e274e05..402481a 100644 --- a/pkg/metric/repository.go +++ b/pkg/metric/repository.go @@ -3,6 +3,7 @@ package metric import ( "context" "fmt" + "strings" "time" "github.com/tencentyun/tencentcloud-exporter/pkg/util" @@ -134,15 +135,12 @@ func (repo *TcmMetricRepositoryImpl) GetSamples(s *TcmSeries, st int64, et int64 request.EndTime = &etStr } + start := time.Now() response := &v20180724.GetMonitorDataResponse{} - if repo.IsInternational && s.Metric.Meta.ProductName == "QAAP" { - response, err = repo.monitorClientInSinapore.GetMonitorData(request) - } else if util.IsStrInList(config.QcloudNamespace, s.Metric.Meta.ProductName) { - response, err = repo.monitorClientInGuangzhou.GetMonitorData(request) - } else { - response, err = repo.monitorClient.GetMonitorData(request) - } + response, err = repo.getMonitorDataWithRetry(s.Metric.Meta.ProductName, request) if err != nil { + level.Error(repo.logger).Log( + "request start time ", stStr, "duration ", time.Since(start).Seconds(), "err ", err.Error()) return } @@ -157,6 +155,29 @@ func (repo *TcmMetricRepositoryImpl) GetSamples(s *TcmSeries, st int64, et int64 return } +func (repo *TcmMetricRepositoryImpl) getMonitorDataWithRetry( + productName string, request *monitor.GetMonitorDataRequest) (*v20180724.GetMonitorDataResponse, error) { + var lastErr error + monitorClient := repo.monitorClient + if repo.IsInternational && productName == "QAAP" { + monitorClient = repo.monitorClientInSinapore + } else if util.IsStrInList(config.QcloudNamespace, productName) { + monitorClient = repo.monitorClientInGuangzhou + } + for i := 0; i < 3; i++ { + resp, err := monitorClient.GetMonitorData(request) + if err != nil { + if strings.Contains(err.Error(), context.DeadlineExceeded.Error()) { + lastErr = err + continue + } + return nil, err + } + return resp, nil + } + return nil, lastErr +} + func (repo *TcmMetricRepositoryImpl) ListSamples(m *TcmMetric, st int64, et int64) ([]*TcmSamples, error) { var samplesList []*TcmSamples for _, seriesList := range m.GetSeriesSplitByBatch(repo.queryMetricBatchSize) { @@ -188,17 +209,17 @@ func (repo *TcmMetricRepositoryImpl) listSampleByBatch( request := repo.buildGetMonitorDataRequest(m, seriesList, st, et) + start := time.Now() response := &v20180724.GetMonitorDataResponse{} - if repo.IsInternational && m.Meta.ProductName == "QAAP" { - response, err = repo.monitorClientInSinapore.GetMonitorData(request) - } else if util.IsStrInList(config.QcloudNamespace, m.Meta.ProductName) { - response, err = repo.monitorClientInGuangzhou.GetMonitorData(request) - } else { - response, err = repo.monitorClient.GetMonitorData(request) - } + response, err = repo.getMonitorDataWithRetry(m.Meta.ProductName, request) if err != nil { + level.Error(repo.logger).Log( + "request start time ", *request.StartTime, + "duration ", time.Since(start).Seconds(), + "err ", err.Error()) return nil, err } + for _, points := range response.Response.DataPoints { samples, ql, e := repo.buildSamples(m, points) if e != nil {