diff --git a/.travis.yml b/.travis.yml index 84d6193d2..43b7234d4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,6 +14,7 @@ env: - MONGODB_IMAGE=mongo:3.4 - MONGODB_IMAGE=mongo:3.6 - MONGODB_IMAGE=percona/percona-server-mongodb:3.4 + - MONGODB_IMAGE=perconalab/percona-server-mongodb:3.6 services: - docker diff --git a/CHANGELOG.md b/CHANGELOG.md index bbef27928..d99174f30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ ## v0.4.0 (not released yet) +* New flags `-collect.database` and `-collect.collection` can be used to enable collection of database and collection + metrics. They are disabled by default. +* MongoDB connections are now kept between the scrapes. New flag `-mongodb.max-connections` (with the default value `1`) + controls the maximum number of established connections. +* Add standard metrics: + * `mongodb_scrape_errors_total` + * `mongodb_up` +* Some queries now contain [cursor comments](https://www.percona.com/blog/2017/06/21/tracing-mongodb-queries-to-code-with-cursor-comments/) + with source code locations. * Go vendoring switched to [dep](https://github.com/golang/dep). ## v0.3.1 (2017-09-08) diff --git a/README.md b/README.md index b57cb21c4..d1cfb60f1 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,12 @@ db.getSiblingDB("admin").createUser({ export MONGODB_URL=mongodb://mongodb_exporter:s3cr3tpassw0rd@localhost:27017 ``` +If you use [x.509 Certificates to Authenticate Clients](https://docs.mongodb.com/manual/tutorial/configure-x509-client-authentication/), pass in username and `authMechanism` via [connection options](https://docs.mongodb.com/manual/reference/connection-string/#connections-connection-options) to the MongoDB uri. Eg: + +``` +mongodb://CN=myName,OU=myOrgUnit,O=myOrg,L=myLocality,ST=myState,C=myCountry@localhost:27017/?authMechanism=MONGODB-X509 +``` + ## Note about how this works Point the process to any mongo port and it will detect if it is a mongos, replicaset member, or stand alone mongod and return the appropriate metrics for that type of node. This was done to preent the need to an exporter per type of process. diff --git a/collector/mongod/collections_status.go b/collector/mongod/collections_status.go new file mode 100644 index 000000000..eb650d832 --- /dev/null +++ b/collector/mongod/collections_status.go @@ -0,0 +1,126 @@ +package collector_mongod + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" +) + +var ( + collectionSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "size", + Help: "The total size in memory of all records in a collection", + }, []string{"db", "coll"}) + collectionObjectCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "count", + Help: "The number of objects or documents in this collection", + }, []string{"db", "coll"}) + collectionAvgObjSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "avgobjsize", + Help: "The average size of an object in the collection (plus any padding)", + }, []string{"db", "coll"}) + collectionStorageSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "storage_size", + Help: "The total amount of storage allocated to this collection for document storage", + }, []string{"db", "coll"}) + collectionIndexes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "indexes", + Help: "The number of indexes on the collection", + }, []string{"db", "coll"}) + collectionIndexesSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "indexes_size", + Help: "The total size of all indexes", + }, []string{"db", "coll"}) +) + +// CollectionStatList contains stats from all collections +type CollectionStatList struct { + Members []CollectionStatus +} + +// CollectionStatus represents stats about a collection in database (mongod and raw from mongos) +type CollectionStatus struct { + Database string + Name string + Size int `bson:"size,omitempty"` + Count int `bson:"count,omitempty"` + AvgObjSize int `bson:"avgObjSize,omitempty"` + StorageSize int `bson:"storageSize,omitempty"` + Indexes int `bson:"indexSizes,omitempty"` + IndexesSize int `bson:"totalIndexSize,omitempty"` +} + +// Export exports database stats to prometheus +func (collStatList *CollectionStatList) Export(ch chan<- prometheus.Metric) { + for _, member := range collStatList.Members { + ls := prometheus.Labels{ + "db": member.Database, + "coll": member.Name, + } + collectionSize.With(ls).Set(float64(member.Size)) + collectionObjectCount.With(ls).Set(float64(member.Count)) + collectionAvgObjSize.With(ls).Set(float64(member.AvgObjSize)) + collectionStorageSize.With(ls).Set(float64(member.StorageSize)) + collectionIndexes.With(ls).Set(float64(member.Indexes)) + collectionIndexesSize.With(ls).Set(float64(member.IndexesSize)) + } + collectionSize.Collect(ch) + collectionObjectCount.Collect(ch) + collectionAvgObjSize.Collect(ch) + collectionStorageSize.Collect(ch) + collectionIndexes.Collect(ch) + collectionIndexesSize.Collect(ch) +} + +// Describe describes database stats for prometheus +func (collStatList *CollectionStatList) Describe(ch chan<- *prometheus.Desc) { + collectionSize.Describe(ch) + collectionObjectCount.Describe(ch) + collectionAvgObjSize.Describe(ch) + collectionStorageSize.Describe(ch) + collectionIndexes.Describe(ch) + collectionIndexesSize.Describe(ch) +} + +// GetDatabaseStatus returns stats for a given database +func GetCollectionStatList(session *mgo.Session) *CollectionStatList { + collectionStatList := &CollectionStatList{} + database_names, err := session.DatabaseNames() + if err != nil { + log.Error("Failed to get database names") + return nil + } + for _, db := range database_names { + collection_names, err := session.DB(db).CollectionNames() + if err != nil { + log.Error("Failed to get collection names for db=" + db) + return nil + } + for _, collection_name := range collection_names { + collStatus := CollectionStatus{} + err := session.DB(db).Run(bson.D{{"collStats", collection_name}, {"scale", 1}}, &collStatus) + collStatus.Database = db + collStatus.Name = collection_name + if err != nil { + log.Error("Failed to get collection status.") + return nil + } + collectionStatList.Members = append(collectionStatList.Members, collStatus) + } + } + + return collectionStatList +} diff --git a/collector/mongod/database_status.go b/collector/mongod/database_status.go new file mode 100644 index 000000000..db1d54a18 --- /dev/null +++ b/collector/mongod/database_status.go @@ -0,0 +1,104 @@ +package collector_mongod + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" +) + +var ( + indexSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "index_size_bytes", + Help: "The total size in bytes of all indexes created on this database", + }, []string{"db"}) + dataSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "data_size_bytes", + Help: "The total size in bytes of the uncompressed data held in this database", + }, []string{"db"}) + collectionsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "collections_total", + Help: "Contains a count of the number of collections in that database", + }, []string{"db"}) + indexesTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "indexes_total", + Help: "Contains a count of the total number of indexes across all collections in the database", + }, []string{"db"}) + objectsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "objects_total", + Help: "Contains a count of the number of objects (i.e. documents) in the database across all collections", + }, []string{"db"}) +) + +// DatabaseStatList contains stats from all databases +type DatabaseStatList struct { + Members []DatabaseStatus +} + +// DatabaseStatus represents stats about a database (mongod and raw from mongos) +type DatabaseStatus struct { + Name string `bson:"db,omitempty"` + IndexSize int `bson:"indexSize,omitempty"` + DataSize int `bson:"dataSize,omitempty"` + Collections int `bson:"collections,omitempty"` + Objects int `bson:"objects,omitempty"` + Indexes int `bson:"indexes,omitempty"` +} + +// Export exports database stats to prometheus +func (dbStatList *DatabaseStatList) Export(ch chan<- prometheus.Metric) { + for _, member := range dbStatList.Members { + ls := prometheus.Labels{"db": member.Name} + indexSize.With(ls).Set(float64(member.IndexSize)) + dataSize.With(ls).Set(float64(member.DataSize)) + collectionsTotal.With(ls).Set(float64(member.Collections)) + indexesTotal.With(ls).Set(float64(member.Indexes)) + objectsTotal.With(ls).Set(float64(member.Objects)) + } + indexSize.Collect(ch) + dataSize.Collect(ch) + collectionsTotal.Collect(ch) + indexesTotal.Collect(ch) + objectsTotal.Collect(ch) + +} + +// Describe describes database stats for prometheus +func (dbStatList *DatabaseStatList) Describe(ch chan<- *prometheus.Desc) { + indexSize.Describe(ch) + dataSize.Describe(ch) + collectionsTotal.Describe(ch) + indexesTotal.Describe(ch) + objectsTotal.Describe(ch) +} + +// GetDatabaseStatList returns stats for all databases +func GetDatabaseStatList(session *mgo.Session) *DatabaseStatList { + dbStatList := &DatabaseStatList{} + database_names, err := session.DatabaseNames() + if err != nil { + log.Error("Failed to get database names") + return nil + } + for _, db := range database_names { + dbStatus := DatabaseStatus{} + err := session.DB(db).Run(bson.D{{"dbStats", 1}, {"scale", 1}}, &dbStatus) + if err != nil { + log.Error("Failed to get database status.") + return nil + } + dbStatList.Members = append(dbStatList.Members, dbStatus) + } + + return dbStatList +} diff --git a/collector/mongod/global_lock.go b/collector/mongod/global_lock.go index 7934006c1..f30ead94c 100644 --- a/collector/mongod/global_lock.go +++ b/collector/mongod/global_lock.go @@ -31,12 +31,6 @@ var ( Name: "total", Help: "The value of totalTime represents the time, in microseconds, since the database last started and creation of the globalLock. This is roughly equivalent to total server uptime", }) - globalLockLockTotal = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: Namespace, - Subsystem: "global_lock", - Name: "lock_total", - Help: "The value of lockTime represents the time, in microseconds, since the database last started, that the globalLock has been held", - }) ) var ( globalLockCurrentQueue = prometheus.NewGaugeVec(prometheus.GaugeOpts{ diff --git a/collector/mongod/oplog_status.go b/collector/mongod/oplog_status.go index 92609486f..68a15dc79 100644 --- a/collector/mongod/oplog_status.go +++ b/collector/mongod/oplog_status.go @@ -19,9 +19,13 @@ import ( "github.com/prometheus/common/log" "gopkg.in/mgo.v2" "gopkg.in/mgo.v2/bson" + + "github.com/percona/mongodb_exporter/shared" ) var ( + oplogDb = "local" + oplogCollection = "oplog.rs" oplogStatusCount = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: Namespace, Subsystem: "replset_oplog", @@ -69,44 +73,34 @@ func BsonMongoTimestampToUnix(timestamp bson.MongoTimestamp) float64 { return float64(timestamp >> 32) } -func GetOplogTimestamps(session *mgo.Session) (*OplogTimestamps, error) { - oplogTimestamps := &OplogTimestamps{} - var err error - - // retry once if there is an error - var tries int64 = 0 - var head_result struct { +func getOplogTailOrHeadTimestamp(session *mgo.Session, returnHead bool) (float64, error) { + var result struct { Timestamp bson.MongoTimestamp `bson:"ts"` } - for tries < 2 { - err = session.DB("local").C("oplog.rs").Find(nil).Sort("-$natural").Limit(1).One(&head_result) - if err == nil { - break - } - tries += 1 - } - if err != nil { - return oplogTimestamps, err - } - // retry once if there is an error - tries = 0 - var tail_result struct { - Timestamp bson.MongoTimestamp `bson:"ts"` + var sortCond string = "$natural" + if returnHead { + sortCond = "-$natural" } - for tries < 2 { - err = session.DB("local").C("oplog.rs").Find(nil).Sort("$natural").Limit(1).One(&tail_result) - if err == nil { - break - } - tries += 1 + + findQuery := session.DB(oplogDb).C(oplogCollection).Find(nil).Sort(sortCond).Limit(1) + err := shared.AddCodeCommentToQuery(findQuery).One(&result) + return BsonMongoTimestampToUnix(result.Timestamp), err +} + +func GetOplogTimestamps(session *mgo.Session) (*OplogTimestamps, error) { + headTs, err := getOplogTailOrHeadTimestamp(session, true) + if err != nil { + return nil, err } + tailTs, err := getOplogTailOrHeadTimestamp(session, false) if err != nil { - return oplogTimestamps, err + return nil, err + } + oplogTimestamps := &OplogTimestamps{ + Head: headTs, + Tail: tailTs, } - - oplogTimestamps.Tail = BsonMongoTimestampToUnix(tail_result.Timestamp) - oplogTimestamps.Head = BsonMongoTimestampToUnix(head_result.Timestamp) return oplogTimestamps, err } diff --git a/collector/mongod/replset_status.go b/collector/mongod/replset_status.go index 677cb0da6..57ade8453 100644 --- a/collector/mongod/replset_status.go +++ b/collector/mongod/replset_status.go @@ -115,12 +115,6 @@ var ( Name: "member_config_version", Help: "The configVersion value is the replica set configuration version.", }, []string{"set", "name", "state"}) - memberOptime = prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: Namespace, - Subsystem: subsystem, - Name: "member_optime", - Help: "Information regarding the last operation from the operation log that this member has applied.", - }, []string{"set", "name", "state"}) ) // ReplSetStatus keeps the data returned by the GetReplSetStatus method diff --git a/collector/mongodb_collector.go b/collector/mongodb_collector.go index e7f90ad13..5d5918624 100644 --- a/collector/mongodb_collector.go +++ b/collector/mongodb_collector.go @@ -15,8 +15,8 @@ package collector import ( - "errors" "fmt" + "sync" "time" "github.com/prometheus/client_golang/prometheus" @@ -32,12 +32,15 @@ const namespace = "mongodb" // MongodbCollectorOpts is the options of the mongodb collector. type MongodbCollectorOpts struct { - URI string - TLSConnection bool - TLSCertificateFile string - TLSPrivateKeyFile string - TLSCaFile string - TLSHostnameValidation bool + URI string + TLSConnection bool + TLSCertificateFile string + TLSPrivateKeyFile string + TLSCaFile string + TLSHostnameValidation bool + DBPoolLimit int + CollectDatabaseMetrics bool + CollectCollectionMetrics bool } func (in MongodbCollectorOpts) toSessionOps() shared.MongoSessionOpts { @@ -48,15 +51,22 @@ func (in MongodbCollectorOpts) toSessionOps() shared.MongoSessionOpts { TLSPrivateKeyFile: in.TLSPrivateKeyFile, TLSCaFile: in.TLSCaFile, TLSHostnameValidation: in.TLSHostnameValidation, + PoolLimit: in.DBPoolLimit, } } // MongodbCollector is in charge of collecting mongodb's metrics. type MongodbCollector struct { - Opts MongodbCollectorOpts + Opts MongodbCollectorOpts + scrapesTotal prometheus.Counter + scrapeErrorsTotal prometheus.Counter lastScrapeError prometheus.Gauge lastScrapeDurationSeconds prometheus.Gauge + mongoUp prometheus.Gauge + + mongoSessLock sync.Mutex + mongoSess *mgo.Session } // NewMongodbCollector returns a new instance of a MongodbCollector. @@ -70,6 +80,12 @@ func NewMongodbCollector(opts MongodbCollectorOpts) *MongodbCollector { Name: "scrapes_total", Help: "Total number of times MongoDB was scraped for metrics.", }), + scrapeErrorsTotal: prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: "exporter", + Name: "scrape_errors_total", + Help: "Total number of times an error occurred scraping a MongoDB.", + }), lastScrapeError: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: namespace, Subsystem: "exporter", @@ -82,11 +98,41 @@ func NewMongodbCollector(opts MongodbCollectorOpts) *MongodbCollector { Name: "last_scrape_duration_seconds", Help: "Duration of the last scrape of metrics from MongoDB.", }), + mongoUp: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "up", + Help: "Whether MongoDB is up.", + }), } return exporter } +// getSession returns the cached *mgo.Session or creates a new session and returns it. +// Use sync.Mutex to avoid race condition around session creation. +func (exporter *MongodbCollector) getSession() *mgo.Session { + exporter.mongoSessLock.Lock() + defer exporter.mongoSessLock.Unlock() + + if exporter.mongoSess == nil { + exporter.mongoSess = shared.MongoSession(exporter.Opts.toSessionOps()) + } + if exporter.mongoSess == nil { + return nil + } + return exporter.mongoSess.Copy() +} + +// Close cleanly closes the mongo session if it exists. +func (exporter *MongodbCollector) Close() { + exporter.mongoSessLock.Lock() + defer exporter.mongoSessLock.Unlock() + + if exporter.mongoSess != nil { + exporter.mongoSess.Close() + } +} + // Describe sends the super-set of all possible descriptors of metrics collected by this Collector // to the provided channel and returns once the last descriptor has been sent. // Part of prometheus.Collector interface. @@ -123,8 +169,10 @@ func (exporter *MongodbCollector) Collect(ch chan<- prometheus.Metric) { exporter.scrape(ch) exporter.scrapesTotal.Collect(ch) + exporter.scrapeErrorsTotal.Collect(ch) exporter.lastScrapeError.Collect(ch) exporter.lastScrapeDurationSeconds.Collect(ch) + exporter.mongoUp.Collect(ch) } func (exporter *MongodbCollector) scrape(ch chan<- prometheus.Metric) { @@ -135,13 +183,16 @@ func (exporter *MongodbCollector) scrape(ch chan<- prometheus.Metric) { if err == nil { exporter.lastScrapeError.Set(0) } else { + exporter.scrapeErrorsTotal.Inc() exporter.lastScrapeError.Set(1) } }(time.Now()) - mongoSess := shared.MongoSession(exporter.Opts.toSessionOps()) + mongoSess := exporter.getSession() if mongoSess == nil { - err = errors.New("can't create mongo session") + err = fmt.Errorf("Can't create mongo session to %s", exporter.Opts.URI) + log.Error(err) + exporter.mongoUp.Set(0) return } defer mongoSess.Close() @@ -150,8 +201,10 @@ func (exporter *MongodbCollector) scrape(ch chan<- prometheus.Metric) { serverVersion, err = shared.MongoSessionServerVersion(mongoSess) if err != nil { log.Errorf("Problem gathering the mongo server version: %s", err) + exporter.mongoUp.Set(0) return } + exporter.mongoUp.Set(1) var nodeType string nodeType, err = shared.MongoSessionNodeType(mongoSess) @@ -189,6 +242,22 @@ func (exporter *MongodbCollector) collectMongos(session *mgo.Session, ch chan<- if shardingStatus != nil { shardingStatus.Export(ch) } + + if exporter.Opts.CollectDatabaseMetrics { + log.Debug("Collecting Database Status From Mongos") + dbStatList := collector_mongos.GetDatabaseStatList(session) + if dbStatList != nil { + dbStatList.Export(ch) + } + } + + if exporter.Opts.CollectCollectionMetrics { + log.Debug("Collecting Collection Status From Mongos") + collStatList := collector_mongos.GetCollectionStatList(session) + if collStatList != nil { + collStatList.Export(ch) + } + } } func (exporter *MongodbCollector) collectMongod(session *mgo.Session, ch chan<- prometheus.Metric) { @@ -197,6 +266,22 @@ func (exporter *MongodbCollector) collectMongod(session *mgo.Session, ch chan<- if serverStatus != nil { serverStatus.Export(ch) } + + if exporter.Opts.CollectDatabaseMetrics { + log.Debug("Collecting Database Status From Mongod") + dbStatList := collector_mongod.GetDatabaseStatList(session) + if dbStatList != nil { + dbStatList.Export(ch) + } + } + + if exporter.Opts.CollectCollectionMetrics { + log.Debug("Collecting Collection Status From Mongod") + collStatList := collector_mongod.GetCollectionStatList(session) + if collStatList != nil { + collStatList.Export(ch) + } + } } func (exporter *MongodbCollector) collectMongodReplSet(session *mgo.Session, ch chan<- prometheus.Metric) { diff --git a/collector/mongos/collections_status.go b/collector/mongos/collections_status.go new file mode 100644 index 000000000..49789f5d5 --- /dev/null +++ b/collector/mongos/collections_status.go @@ -0,0 +1,126 @@ +package collector_mongos + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" +) + +var ( + collectionSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "size", + Help: "The total size in memory of all records in a collection", + }, []string{"db", "coll"}) + collectionObjectCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "count", + Help: "The number of objects or documents in this collection", + }, []string{"db", "coll"}) + collectionAvgObjSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "avgobjsize", + Help: "The average size of an object in the collection (plus any padding)", + }, []string{"db", "coll"}) + collectionStorageSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "storage_size", + Help: "The total amount of storage allocated to this collection for document storage", + }, []string{"db", "coll"}) + collectionIndexes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "indexes", + Help: "The number of indexes on the collection", + }, []string{"db", "coll"}) + collectionIndexesSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db_coll", + Name: "indexes_size", + Help: "The total size of all indexes", + }, []string{"db", "coll"}) +) + +// CollectionStatList contains stats from all collections +type CollectionStatList struct { + Members []CollectionStatus +} + +// CollectionStatus represents stats about a collection in database (mongod and raw from mongos) +type CollectionStatus struct { + Database string + Name string + Size int `bson:"size,omitempty"` + Count int `bson:"count,omitempty"` + AvgObjSize int `bson:"avgObjSize,omitempty"` + StorageSize int `bson:"storageSize,omitempty"` + Indexes int `bson:"indexSizes,omitempty"` + IndexesSize int `bson:"totalIndexSize,omitempty"` +} + +// Export exports database stats to prometheus +func (collStatList *CollectionStatList) Export(ch chan<- prometheus.Metric) { + for _, member := range collStatList.Members { + ls := prometheus.Labels{ + "db": member.Database, + "coll": member.Name, + } + collectionSize.With(ls).Set(float64(member.Size)) + collectionObjectCount.With(ls).Set(float64(member.Count)) + collectionAvgObjSize.With(ls).Set(float64(member.AvgObjSize)) + collectionStorageSize.With(ls).Set(float64(member.StorageSize)) + collectionIndexes.With(ls).Set(float64(member.Indexes)) + collectionIndexesSize.With(ls).Set(float64(member.IndexesSize)) + } + collectionSize.Collect(ch) + collectionObjectCount.Collect(ch) + collectionAvgObjSize.Collect(ch) + collectionStorageSize.Collect(ch) + collectionIndexes.Collect(ch) + collectionIndexesSize.Collect(ch) +} + +// Describe describes database stats for prometheus +func (collStatList *CollectionStatList) Describe(ch chan<- *prometheus.Desc) { + collectionSize.Describe(ch) + collectionObjectCount.Describe(ch) + collectionAvgObjSize.Describe(ch) + collectionStorageSize.Describe(ch) + collectionIndexes.Describe(ch) + collectionIndexesSize.Describe(ch) +} + +// GetDatabaseStatus returns stats for a given database +func GetCollectionStatList(session *mgo.Session) *CollectionStatList { + collectionStatList := &CollectionStatList{} + database_names, err := session.DatabaseNames() + if err != nil { + log.Error("Failed to get database names") + return nil + } + for _, db := range database_names { + collection_names, err := session.DB(db).CollectionNames() + if err != nil { + log.Error("Failed to get collection names for db=" + db) + return nil + } + for _, collection_name := range collection_names { + collStatus := CollectionStatus{} + err := session.DB(db).Run(bson.D{{"collStats", collection_name}, {"scale", 1}}, &collStatus) + collStatus.Database = db + collStatus.Name = collection_name + if err != nil { + log.Error("Failed to get collection status.") + return nil + } + collectionStatList.Members = append(collectionStatList.Members, collStatus) + } + } + + return collectionStatList +} diff --git a/collector/mongos/database_status.go b/collector/mongos/database_status.go new file mode 100644 index 000000000..6cc74b5db --- /dev/null +++ b/collector/mongos/database_status.go @@ -0,0 +1,125 @@ +package collector_mongos + +import ( + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" + "gopkg.in/mgo.v2" + "gopkg.in/mgo.v2/bson" +) + +var ( + indexSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "index_size_bytes", + Help: "The total size in bytes of all indexes created on this database", + }, []string{"db", "shard"}) + dataSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "data_size_bytes", + Help: "The total size in bytes of the uncompressed data held in this database", + }, []string{"db", "shard"}) + collectionsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "collections_total", + Help: "Contains a count of the number of collections in that database", + }, []string{"db", "shard"}) + indexesTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "indexes_total", + Help: "Contains a count of the total number of indexes across all collections in the database", + }, []string{"db", "shard"}) + objectsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: Namespace, + Subsystem: "db", + Name: "objects_total", + Help: "Contains a count of the number of objects (i.e. documents) in the database across all collections", + }, []string{"db", "shard"}) +) + +// DatabaseStatList contains stats from all databases +type DatabaseStatList struct { + Members []DatabaseStatus +} + +// DatabaseStatus represents stats about a database (mongod and raw from mongos) +type DatabaseStatus struct { + RawStatus // embed to collect top-level attributes + Shards map[string]*RawStatus `bson:"raw,omitempty"` +} + +// RawStatus represents stats about a database from Mongos side +type RawStatus struct { + Name string `bson:"db,omitempty"` + IndexSize int `bson:"indexSize,omitempty"` + DataSize int `bson:"dataSize,omitempty"` + Collections int `bson:"collections,omitempty"` + Objects int `bson:"objects,omitempty"` + Indexes int `bson:"indexes,omitempty"` +} + +// Export exports database stats to prometheus +func (dbStatList *DatabaseStatList) Export(ch chan<- prometheus.Metric) { + for _, member := range dbStatList.Members { + if len(member.Shards) > 0 { + for shard, stats := range member.Shards { + ls := prometheus.Labels{ + "db": stats.Name, + "shard": strings.Split(shard, "/")[0], + } + indexSize.With(ls).Set(float64(stats.IndexSize)) + dataSize.With(ls).Set(float64(stats.DataSize)) + collectionsTotal.With(ls).Set(float64(stats.Collections)) + indexesTotal.With(ls).Set(float64(stats.Indexes)) + objectsTotal.With(ls).Set(float64(stats.Objects)) + } + } + } + + indexSize.Collect(ch) + dataSize.Collect(ch) + collectionsTotal.Collect(ch) + indexesTotal.Collect(ch) + objectsTotal.Collect(ch) + + indexSize.Reset() + dataSize.Reset() + collectionsTotal.Reset() + indexesTotal.Reset() + objectsTotal.Reset() +} + +// Describe describes database stats for prometheus +func (dbStatList *DatabaseStatList) Describe(ch chan<- *prometheus.Desc) { + indexSize.Describe(ch) + dataSize.Describe(ch) + collectionsTotal.Describe(ch) + indexesTotal.Describe(ch) + objectsTotal.Describe(ch) +} + +// GetDatabaseStatList returns stats for all databases +func GetDatabaseStatList(session *mgo.Session) *DatabaseStatList { + dbStatList := &DatabaseStatList{} + database_names, err := session.DatabaseNames() + if err != nil { + log.Error("Failed to get database names") + return nil + } + for _, db := range database_names { + dbStatus := DatabaseStatus{} + err := session.DB(db).Run(bson.D{{"dbStats", 1}, {"scale", 1}}, &dbStatus) + if err != nil { + log.Error("Failed to get database status.") + return nil + } + dbStatList.Members = append(dbStatList.Members, dbStatus) + } + + return dbStatList +} diff --git a/collector/mongos/sharding_status.go b/collector/mongos/sharding_status.go index 2737b177e..b7daeb6f2 100644 --- a/collector/mongos/sharding_status.go +++ b/collector/mongos/sharding_status.go @@ -114,7 +114,7 @@ func IsBalancerEnabled(session *mgo.Session) float64 { if err != nil { return 1 } - if balancerConfig.Stopped == true { + if balancerConfig.Stopped { return 0 } return 1 diff --git a/collector/mongos/sharding_topology.go b/collector/mongos/sharding_topology.go index b458d762a..5f93865c7 100644 --- a/collector/mongos/sharding_topology.go +++ b/collector/mongos/sharding_topology.go @@ -132,7 +132,7 @@ func (status *ShardingTopoStats) Export(ch chan<- prometheus.Metric) { if status.Shards != nil { var drainingShards float64 = 0 for _, shard := range *status.Shards { - if shard.Draining == true { + if shard.Draining { drainingShards = drainingShards + 1 } } diff --git a/mongodb_exporter.go b/mongodb_exporter.go index 8f1c44e26..84b7eb5d4 100644 --- a/mongodb_exporter.go +++ b/mongodb_exporter.go @@ -52,6 +52,9 @@ var ( sslCertFileF = flag.String("web.ssl-cert-file", "", "Path to SSL certificate file.") sslKeyFileF = flag.String("web.ssl-key-file", "", "Path to SSL key file.") + collectDatabaseF = flag.Bool("collect.database", false, "Enable collection of Database metrics") + collectCollectionF = flag.Bool("collect.collection", false, "Enable collection of Collection metrics") + uriF = flag.String("mongodb.uri", mongodbDefaultURI(), "MongoDB URI, format: [mongodb://][user:pass@]host1[:port1][,host2[:port2],...][/database][?options]") tlsF = flag.Bool("mongodb.tls", false, "Enable tls connection with mongo server") tlsCertF = flag.String("mongodb.tls-cert", "", "Path to PEM file that contains the certificate (and optionally also the decrypted private key in PEM format).\n"+ @@ -62,6 +65,7 @@ var ( " \tIf provided: MongoDB servers connecting to should present a certificate signed by one of this CAs.\n"+ " \tIf not provided: System default CAs are used.") tlsDisableHostnameValidationF = flag.Bool("mongodb.tls-disable-hostname-validation", false, "Do hostname validation for server connection.") + maxConnectionsF = flag.Int("mongodb.max-connections", 1, "Max number of pooled connections to the database.") // FIXME currently ignored enabledGroupsFlag = flag.String("groups.enabled", "asserts,durability,background_flushing,connections,extra_info,global_lock,index_counters,network,op_counters,op_counters_repl,memory,locks,metrics", "Comma-separated list of groups to use, for more info see: docs.mongodb.org/manual/reference/command/serverStatus/") @@ -154,8 +158,8 @@ func startWebServer() { } handler := prometheusHandler() - - registerCollector() + collector := registerCollector() + defer collector.Close() if (*sslCertFileF == "") != (*sslKeyFileF == "") { log.Fatal("One of the flags -web.ssl-cert-file or -web.ssl-key-file is missing to enable HTTPS/TLS") @@ -210,16 +214,20 @@ func startWebServer() { } } -func registerCollector() { +func registerCollector() *collector.MongodbCollector { mongodbCollector := collector.NewMongodbCollector(collector.MongodbCollectorOpts{ - URI: *uriF, - TLSConnection: *tlsF, - TLSCertificateFile: *tlsCertF, - TLSPrivateKeyFile: *tlsPrivateKeyF, - TLSCaFile: *tlsCAF, - TLSHostnameValidation: !(*tlsDisableHostnameValidationF), + URI: *uriF, + TLSConnection: *tlsF, + TLSCertificateFile: *tlsCertF, + TLSPrivateKeyFile: *tlsPrivateKeyF, + TLSCaFile: *tlsCAF, + TLSHostnameValidation: !(*tlsDisableHostnameValidationF), + DBPoolLimit: *maxConnectionsF, + CollectDatabaseMetrics: *collectDatabaseF, + CollectCollectionMetrics: *collectCollectionF, }) prometheus.MustRegister(mongodbCollector) + return mongodbCollector } func main() { diff --git a/shared/connection.go b/shared/connection.go index 1aaa5acde..3f2c2f375 100644 --- a/shared/connection.go +++ b/shared/connection.go @@ -52,6 +52,7 @@ type MongoSessionOpts struct { TLSPrivateKeyFile string TLSCaFile string TLSHostnameValidation bool + PoolLimit int } func MongoSession(opts MongoSessionOpts) *mgo.Session { @@ -61,8 +62,10 @@ func MongoSession(opts MongoSessionOpts) *mgo.Session { return nil } - dialInfo.Direct = true // Force direct connection + // connect directly, fail faster, do not retry - for faster responses and accurate metrics, including mongoUp + dialInfo.Direct = true dialInfo.Timeout = dialMongodbTimeout + dialInfo.FailFast = true err = opts.configureDialInfoIfRequired(dialInfo) if err != nil { @@ -76,13 +79,15 @@ func MongoSession(opts MongoSessionOpts) *mgo.Session { return nil } session.SetMode(mgo.Eventual, true) + session.SetPoolLimit(opts.PoolLimit) + session.SetPrefetch(0.00) session.SetSyncTimeout(syncMongodbTimeout) - session.SetSocketTimeout(0) + session.SetSocketTimeout(dialMongodbTimeout) return session } func (opts MongoSessionOpts) configureDialInfoIfRequired(dialInfo *mgo.DialInfo) error { - if opts.TLSConnection == true { + if opts.TLSConnection { config := &tls.Config{ InsecureSkipVerify: !opts.TLSHostnameValidation, } diff --git a/shared/utils.go b/shared/utils.go index 4c8c71ecd..e3d87dd28 100644 --- a/shared/utils.go +++ b/shared/utils.go @@ -18,6 +18,10 @@ import ( "crypto/tls" "crypto/x509" "io/ioutil" + "runtime" + "strconv" + + "gopkg.in/mgo.v2" ) func LoadCaFrom(pemFile string) (*x509.CertPool, error) { @@ -37,3 +41,13 @@ func LoadKeyPairFrom(pemFile string, privateKeyPemFile string) (tls.Certificate, } return tls.LoadX509KeyPair(pemFile, targetPrivateKeyPemFile) } + +// AddCodeCommentToQuery adds location of the caller in the source code (e.g. "oplog_status.go:91") +// to the given query as a comment. +func AddCodeCommentToQuery(query *mgo.Query) *mgo.Query { + _, fileName, lineNum, ok := runtime.Caller(1) + if !ok { + return query + } + return query.Comment(fileName + ":" + strconv.Itoa(lineNum)) +}