Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions alertlog/alertlog.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ type LogRecord struct {
var databaseFailures map[string]int = map[string]int{}

func UpdateLog(logDestination string, logger *slog.Logger, d *collector.Database) {
// Do not try to query the alert log if the database configuration is invalid.
if !d.IsValid() {
return
}

queryFailures := databaseFailures[d.Name]
if queryFailures == 3 {
Expand Down
6 changes: 6 additions & 0 deletions collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,12 @@ func (e *Exporter) scheduledScrape(tick *time.Time) {
}

func (e *Exporter) scrapeDatabase(ch chan<- prometheus.Metric, errChan chan<- error, d *Database, tick *time.Time) int {
// If the database configuration is invalid, do not attempt to ping or reestablish the database connection.
if !d.IsValid() {
e.logger.Warn("Invalid database configuration, will not attempt reconnection", "database", d.Name)
errChan <- fmt.Errorf("database %s is invalid, will not be scraped", d.Name)
return 1
}
// If ping fails, we will try again on the next iteration of metrics scraping
if err := d.ping(e.logger); err != nil {
e.logger.Error("Error pinging database", "error", err, "database", d.Name)
Expand Down
30 changes: 30 additions & 0 deletions collector/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package collector
import (
"context"
"database/sql"
"errors"
"fmt"
"github.com/godror/godror"
"github.com/godror/godror/dsn"
Expand All @@ -15,6 +16,10 @@ import (
"time"
)

const (
ora01017code = 1017
)

func (d *Database) UpMetric(exporterLabels map[string]string) prometheus.Metric {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "up"),
Expand Down Expand Up @@ -49,6 +54,10 @@ func (d *Database) ping(logger *slog.Logger) error {
err := d.Session.PingContext(ctx)
if err != nil {
d.Up = 0
if isInvalidCredentialsError(err) {
d.invalidate()
return err
}
// If database is closed, try to reconnect
if strings.Contains(err.Error(), "sql: database is closed") {
db, dbtype := connect(logger, d.Name, d.Config)
Expand Down Expand Up @@ -83,6 +92,7 @@ func NewDatabase(logger *slog.Logger, dbname string, dbconfig DatabaseConfig) *D
Session: db,
Type: dbtype,
Config: dbconfig,
Valid: true,
}
}

Expand Down Expand Up @@ -127,6 +137,26 @@ func (d *Database) WarmupConnectionPool(logger *slog.Logger) {
}
}

func (d *Database) IsValid() bool {
return d.Valid
}

func (d *Database) invalidate() {
d.Valid = false
}

func isInvalidCredentialsError(err error) bool {
err = errors.Unwrap(err)
if err == nil {
return false
}
oraErr, ok := err.(*godror.OraErr)
if !ok {
return false
}
return oraErr.Code() == ora01017
}

func connect(logger *slog.Logger, dbname string, dbconfig DatabaseConfig) (*sql.DB, float64) {
logger.Debug("Launching connection to "+maskDsn(dbconfig.URL), "database", dbname)

Expand Down
2 changes: 2 additions & 0 deletions collector/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ type Database struct {
// MetricsCache holds computed metrics for a database, so these metrics are available on each scrape.
// Given a metric's scrape configuration, it may not be computed on the same interval as other metrics.
MetricsCache *MetricsCache

Valid bool
}

type MetricsCache struct {
Expand Down
1 change: 1 addition & 0 deletions site/docs/releases/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Our current priorities to support metrics for advanced database features and use

- Updated project dependencies.
- Standardize multi-arch builds and document supported database versions.
- If the exporter fails to connect to a database due to invalid credentials (ORA-01017 error), that database configuration will be invalidated and the exporter will not attempt to re-establish the database connection. Other databases will continue to be scraped.
- Metrics with an empty databases array (`databases = []`) are now considered disabled, and will not be scraped.
- Increased the default query timeout for the `top_sql` metric to 10 seconds (previously 5 seconds).
- Metrics using the `scrapeinterval` property will no longer be scraped on every request if they have a cached value. This only applies when the metrics exporter is configured to scrape metrics _on request_, rather than on a global interval.
Expand Down
4 changes: 2 additions & 2 deletions site/docs/releases/roadmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ We welcome input on community-driven features you'd like to see supported. Pleas
Currently, we plan to address the following key features:

- Provide default Oracle Exadata metrics
- Implement connection storm protection: prevent the exporter from repeatedly connecting when the credentials fail, to prevent a storm of connections causing accounts to be locked across a large number of databases
- Provide default GoldenGate metrics
- Enhance database alert logging and alert log metrics
- Provide the option to have the Oracle client outside of the container image, e.g., on a shared volume,
- Implement the ability to update the configuration dynamically, i.e., without a restart
- Implement support for tracing within the database, e.g., using an execution context ID provide by an external caller
- Provide additional pre-built Grafana dashboards,
- Integration with Spring Observability, e.g., Micrometer
- Provide additional documentation and samples