Skip to content

Commit

Permalink
PMM-12989 Reduce error logs from diagnostic data on arbiter nodes (#820)
Browse files Browse the repository at this point in the history
* PMM-12989 return early when checking diagnostic data for arbiter nodes

* fix lint

* bump test timeout

* skip diagnostic data if we can't run command

* remove unneeded test

* refactor tests

* drop timeout

* revert timeout'

* changeset for tests

* change keyfile permission

Also, update actions workflow to output docker logs on failure

* change file permission before running

* bump permissions

* add entrypoint for keyfile

* use custom dockerfile for authenticated instances

* use only default cluster as shard server

* drop logs

* fix linter errors

* split metrics collection

* show warning earlier when running on arbiter

* show warning earlier when running on arbiter

* Update diagnostic_data_collector.go

* Update v1_compatibility.go

---------

Co-authored-by: Nurlan Moldomurov <bupychuk1989@gmail.com>
  • Loading branch information
idoqo and BupycHuk committed May 29, 2024
1 parent e39e36f commit 28e85f3
Show file tree
Hide file tree
Showing 9 changed files with 217 additions and 71 deletions.
12 changes: 9 additions & 3 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,24 @@ It will install `goimports`, `goreleaser`, `golangci-lint` and `reviewdog`.

The testing sandbox starts `n` MongoDB instances as follows:

- 3 Instances for shard 1 at ports 17001, 17002, 17003
- 3 instances for shard 2 at ports 17004, 17005, 17006
- 3 Instances for shard 1 at ports 17001, 17002, 17003 (with no authentication)
- 3 instances for shard 2 at ports 17004, 17005, 17006 (with authentication enabled)
- 3 config servers at ports 17007, 17008, 17009
- 1 mongos server at port 17000
- 1 stand alone instance at port 27017

All instances are currently running without user and password so for example, to connect to the **mongos** you can just use:
To connect to the **mongos** on shard 1, you can use:

```
mongo mongodb://127.0.0.1:17001/admin
```

To connect to the **mongos** on shard 2 (with authentication enabled), you can use:

```
mongo mongodb://admin:admin@127.0.0.1:17001/admin
```

The sandbox can be started using the provided Makefile using: `make test-cluster` and it can be stopped using `make test-cluster-clean`.

### Running tests
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ test-race: env ## Run all tests with race flag.
go test -race -v -timeout 30s ./...

test-cluster: env ## Starts MongoDB test cluster. Use env var TEST_MONGODB_IMAGE to set flavor and version. Example: TEST_MONGODB_IMAGE=mongo:3.6 make test-cluster
docker compose up -d
docker compose up --build -d

test-cluster-clean: env ## Stops MongoDB test cluster.
docker compose down --remove-orphans
33 changes: 25 additions & 8 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,37 +63,52 @@ services:

mongo-2-2:
container_name: "mongo-2-2"
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
build:
dockerfile: ./docker/mongodb-auth.dockerfile
args:
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
environment:
- MONGO_INITDB_ROOT_USERNAME=${TEST_MONGODB_USERNAME:-admin}
- MONGO_INITDB_ROOT_PASSWORD=${TEST_MONGODB_PASSWORD:-admin}
ports:
- "${TEST_MONGODB_S2_PRIMARY_PORT:-17004}:27017"
command: mongod --replSet rs2 --shardsvr --port 27017 --oplogSize 16
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
networks:
- rs2

mongo-2-3:
container_name: "mongo-2-3"
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
build:
dockerfile: ./docker/mongodb-auth.dockerfile
args:
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
ports:
- "${TEST_MONGODB_S2_SECONDARY1_PORT:-17005}:27017"
command: mongod --replSet rs2 --shardsvr --port 27017 --oplogSize 16
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
networks:
- rs2

mongo-2-1:
container_name: "mongo-2-1"
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
build:
dockerfile: ./docker/mongodb-auth.dockerfile
args:
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
ports:
- "${TEST_MONGODB_S2_SECONDARY2_PORT:-17006}:27017"
command: mongod --replSet rs2 --shardsvr --port 27017 --oplogSize 16
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
networks:
- rs2

mongo-2-arbiter:
container_name: "mongo-2-arbiter"
image: ${TEST_MONGODB_IMAGE:-mongo:4.2}
build:
dockerfile: ./docker/mongodb-auth.dockerfile
args:
TEST_MONGODB_IMAGE: ${TEST_MONGODB_IMAGE}
ports:
- "${TEST_MONGODB_S2_ARBITER:-17012}:27017"
command: mongod --replSet rs1 --shardsvr --port 27017 --oplogSize 16
command: mongod --replSet rs2 --port 27017 --oplogSize 16 --auth --keyFile=/opt/keyfile
networks:
- rs2

Expand All @@ -114,6 +129,8 @@ services:
- ARBITER=mongo-2-arbiter
- RS=rs2
- VERSION=${TEST_MONGODB_IMAGE}
- MONGO_INITDB_ROOT_USERNAME=${TEST_MONGODB_USERNAME:-admin}
- MONGO_INITDB_ROOT_PASSWORD=${TEST_MONGODB_PASSWORD:-admin}
entrypoint: [ "/scripts/setup.sh" ]
networks:
- rs2
Expand Down
6 changes: 6 additions & 0 deletions docker/mongodb-auth.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
ARG TEST_MONGODB_IMAGE=mongo:4.2
FROM ${TEST_MONGODB_IMAGE}
USER root
COPY docker/secret/keyfile /opt/keyfile
RUN chown mongodb /opt/keyfile && chmod 400 /opt/keyfile && mkdir -p /home/mongodb/ && chown mongodb /home/mongodb
USER mongodb
10 changes: 9 additions & 1 deletion docker/scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ MONGODB_CLIENT="mongosh --quiet"
PARSED=(${VERSION//:/ })
MONGODB_VERSION=${PARSED[1]}
MONGODB_VENDOR=${PARSED[0]}

if [ "`echo ${MONGODB_VERSION} | cut -c 1`" = "4" ]; then
MONGODB_CLIENT="mongo"
fi
Expand All @@ -17,6 +18,9 @@ mongodb2=`getent hosts ${MONGO2} | awk '{ print $1 }'`
mongodb3=`getent hosts ${MONGO3} | awk '{ print $1 }'`
arbiter=`getent hosts ${ARBITER} | awk '{ print $1 }'`

username=${MONGO_INITDB_ROOT_USERNAME}
password=${MONGO_INITDB_ROOT_PASSWORD}

port=${PORT:-27017}

echo "Waiting for startup.."
Expand Down Expand Up @@ -60,7 +64,11 @@ EOF

function general_servers() {
echo "setup servers on ${MONGO1}(${mongodb1}:${port})"
${MONGODB_CLIENT} --host ${mongodb1}:${port} <<EOF
command="${MONGODB_CLIENT} --host ${mongodb1}:${port}"
if [[ -n "$username" && -n "$password" ]]; then
command="${MONGODB_CLIENT} --host ${mongodb1}:${port} -u ${username} -p ${password}"
fi
${command} <<EOF
var cfg = {
"_id": "${RS}",
"protocolVersion": 1,
Expand Down
16 changes: 16 additions & 0 deletions docker/secret/keyfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
P4CWhUl1qj2EiSM+r3mzAHSATiAMBv8l2rAClMrAqOqyyCzVd2tIFW0nOEbSRvoL
Ap7oZkC7IvjX5ELJnqQbxT00+dv1s8U1UFk3J96xro1x3mLgAmASJ1TrFSKXUkss
bbwZ6QBNONCrTNCqlL2q1umLiYUTPeUH0RlLg2GsH1H66a9dEKZaS5AtJIO1hJ6s
znivgdx2vnSA3bs1coR3nEBeNRsMoHp2Dtn9P6FDNlhssZh+EO9GdoRh16cqRiEM
V2VnS2vjEZPpcMrX3ZGU/JBy/bw5GhGXrIfzQrvuKanUsuAVA3bmh6Fh8Rnu66mA
GETO09R/3cjLxHBngZdL9Zn3+3HgDTQHKtpW0ZfjdiK5Xh4JqGXKtDr68S88qgAb
eYQaeJMBoe0o1JXeKYRba+fXmubO7nGN3IDYWulNtkJLe9bx1mseVqlh9FSg7CuG
++sszE7nzmtAlZlaLKlONMm9+dLjhT9n7cSkoWbSIUHvWIzGKC2yadabt7T21VPc
Cyxdaat28xo93J5qjUetjTLKWQmG4eL/AG2/plT7WrTOjQqYCjfFU3XmpVBgtaAr
wkJB8sOUbjcuVJok235Eu/8SxHJKQb0dl8ALwbwLG7CPZMrZ5YF6eJgjE+Sy+WKn
OqfyzNwS7j7nvOvdpRgnHUwwc0G6eIfj90S+XdieJMMuzt7NUS8NsCuQx3nAK0EB
FNWk02pUYE5yHWvmG+7Mo+Jd9OBWcQnGErux6NTYA42uYiMamZgsByBJ2Y8S2a2L
VtbJVhO2indAvGqrGNbdOKGKagFjIQP2A7zsvLFkgfwbBG785CI3Il9jaeQhcefY
Xs0cSSFhFkz/Ak17gU5l0WF6h5HHacWaCzmWdOiZVbOeZboaaxSBry+uZ8oRXPbp
BavuRtu1afTSrikgJdCcoA8Opn7kkETt5xa5PP1dNLEWIMFui2BOlRAw8BDWJ+nD
8fYBaVaqdDs/Y8K0xWHuJjja+s91
98 changes: 64 additions & 34 deletions exporter/diagnostic_data_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ type diagnosticDataCollector struct {

// newDiagnosticDataCollector creates a collector for diagnostic information.
func newDiagnosticDataCollector(ctx context.Context, client *mongo.Client, logger *logrus.Logger, compatible bool, topology labelsGetter) *diagnosticDataCollector {
nodeType, err := getNodeType(ctx, client)
if err != nil {
logger.WithFields(logrus.Fields{
"component": "diagnosticDataCollector",
}).Errorf("Cannot get node type: %s", err)
}
if nodeType == typeArbiter {
logger.WithFields(logrus.Fields{
"component": "diagnosticDataCollector",
}).Warn("some metrics might be unavailable on arbiter nodes")
}

return &diagnosticDataCollector{
ctx: ctx,
base: newBaseCollector(client, logger.WithFields(logrus.Fields{"collector": "diagnostic_data"})),
Expand All @@ -66,56 +78,74 @@ func (d *diagnosticDataCollector) collect(ch chan<- prometheus.Metric) {
logger := d.base.logger
client := d.base.client

nodeType, err := getNodeType(d.ctx, client)
if err != nil {
logger.WithFields(logrus.Fields{
"component": "diagnosticDataCollector",
}).Errorf("Cannot get node type: %s", err)
}

var metrics []prometheus.Metric
cmd := bson.D{{Key: "getDiagnosticData", Value: "1"}}
res := client.Database("admin").RunCommand(d.ctx, cmd)
if res.Err() != nil {
logger.Errorf("failed to run command: getDiagnosticData: %s", res.Err())
logger.Warn("cannot run getDiagnosticData, some metrics might be unavailable.")
}
if nodeType != typeArbiter {
logger.Warnf("failed to run command: getDiagnosticData, some metrics might be unavailable %s", res.Err())
}
} else {
if err := res.Decode(&m); err != nil {
logger.Errorf("cannot run getDiagnosticData: %s", err)
return
}

if err := res.Decode(&m); err != nil {
logger.Errorf("cannot run getDiagnosticData: %s", err)
}
if m == nil || m["data"] == nil {
logger.Error("cannot run getDiagnosticData: response is empty")
}

if m == nil || m["data"] == nil {
logger.Error("cannot run getDiagnosticData: response is empty")
}
var ok bool
m, ok = m["data"].(bson.M)
if !ok {
err = errors.Wrapf(errUnexpectedDataType, "%T for data field", m["data"])
logger.Errorf("cannot decode getDiagnosticData: %s", err)
}

m, ok := m["data"].(bson.M)
if !ok {
err := errors.Wrapf(errUnexpectedDataType, "%T for data field", m["data"])
logger.Debug("getDiagnosticData result")
debugResult(logger, m)

logger.Errorf("cannot decode getDiagnosticData: %s", err)
}
metrics = makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode)
metrics = append(metrics, locksMetrics(logger, m)...)

logger.Debug("getDiagnosticData result")
debugResult(logger, m)
securityMetric, err := d.getSecurityMetricFromLineOptions(client)
if err != nil {
logger.Errorf("failed to run command: getCmdLineOptions: %s", err)
} else if securityMetric != nil {
metrics = append(metrics, securityMetric)
}

metrics := makeMetrics("", m, d.topologyInfo.baseLabels(), d.compatibleMode)
metrics = append(metrics, locksMetrics(logger, m)...)
if d.compatibleMode {
metrics = append(metrics, specialMetrics(d.ctx, client, m, nodeType, logger)...)

securityMetric, err := d.getSecurityMetricFromLineOptions(client)
if err != nil {
logger.Errorf("cannot decode getCmdLineOtpions: %s", err)
} else if securityMetric != nil {
metrics = append(metrics, securityMetric)
if cem, err := cacheEvictedTotalMetric(m); err == nil {
metrics = append(metrics, cem)
}
}
}

if d.compatibleMode {
logger.Debug("running special metrics for compatibility mode")
metrics = append(metrics, specialMetrics(d.ctx, client, m, logger)...)
buildInfo, err := retrieveMongoDBBuildInfo(d.ctx, client, logger)
if err != nil {
logger.Errorf("cannot retrieve MongoDB buildInfo: %s", err)
}

metrics = append(metrics, serverVersion(buildInfo))

if cem, err := cacheEvictedTotalMetric(m); err == nil {
metrics = append(metrics, cem)
if nodeType == typeArbiter {
if hm := arbiterMetrics(d.ctx, client, logger); hm != nil {
metrics = append(metrics, hm...)
}
}

nodeType, err := getNodeType(d.ctx, client)
if err != nil {
logger.WithFields(logrus.Fields{
"component": "diagnosticDataCollector",
}).Errorf("Cannot get node type to check if this is a mongos: %s", err)
} else if nodeType == typeMongos {
logger.Debug("running special metrics for mongos")
if nodeType == typeMongos {
metrics = append(metrics, mongosMetrics(d.ctx, client, logger)...)
}
}
Expand Down
78 changes: 78 additions & 0 deletions exporter/diagnostic_data_collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/sirupsen/logrus"
logrustest "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.mongodb.org/mongo-driver/bson"
Expand Down Expand Up @@ -236,6 +237,83 @@ func TestAllDiagnosticDataCollectorMetrics(t *testing.T) {
}
}

//nolint:funlen
func TestDiagnosticDataErrors(t *testing.T) {
t.Parallel()
type log struct {
message string
level uint32
}

type testCase struct {
name string
containerName string
expectedMessage string
}

cases := []testCase{
{
name: "authenticated arbiter has warning about missing metrics",
containerName: "mongo-2-arbiter",
expectedMessage: "some metrics might be unavailable on arbiter nodes",
},
{
name: "authenticated data node has no error in logs",
containerName: "mongo-1-1",
expectedMessage: "",
},
{
name: "unauthenticated arbiter has warning about missing metrics",
containerName: "mongo-1-arbiter",
expectedMessage: "some metrics might be unavailable on arbiter nodes",
},
}

for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()

port, err := tu.PortForContainer(tc.containerName)
require.NoError(t, err)
client := tu.TestClient(ctx, port, t)

logger, hook := logrustest.NewNullLogger()
ti := newTopologyInfo(ctx, client, logger)
c := newDiagnosticDataCollector(ctx, client, logger, true, ti)

reg := prometheus.NewRegistry()
err = reg.Register(c)
require.NoError(t, err)
_ = helpers.CollectMetrics(c)

var errorLogs []log
for _, entry := range hook.Entries {
if entry.Level == logrus.ErrorLevel || entry.Level == logrus.WarnLevel {
errorLogs = append(errorLogs, log{
message: entry.Message,
level: uint32(entry.Level),
})
}
}

if tc.expectedMessage == "" {
assert.Empty(t, errorLogs)
} else {
require.NotEmpty(t, errorLogs)
assert.True(
t,
strings.HasPrefix(hook.LastEntry().Message, tc.expectedMessage),
"'%s' has no prefix: '%s'",
hook.LastEntry().Message,
tc.expectedMessage)
}
})
}
}

func TestContextTimeout(t *testing.T) {
ctx := context.Background()

Expand Down
Loading

0 comments on commit 28e85f3

Please sign in to comment.