From 635f643333e572990f8274e8270ce1d84b5603c2 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Fri, 26 Sep 2025 11:08:43 -0700 Subject: [PATCH 1/3] Invalid database entry on failed login Signed-off-by: Anders Swanson --- alertlog/alertlog.go | 4 ++++ collector/collector.go | 6 ++++++ collector/database.go | 26 ++++++++++++++++++++++++++ collector/types.go | 2 ++ 4 files changed, 38 insertions(+) diff --git a/alertlog/alertlog.go b/alertlog/alertlog.go index 6b887d40..d201072f 100644 --- a/alertlog/alertlog.go +++ b/alertlog/alertlog.go @@ -24,6 +24,10 @@ type LogRecord struct { var databaseFailures map[string]int = map[string]int{} func UpdateLog(logDestination string, logger *slog.Logger, d *collector.Database) { + // Do not try to query the alert log if the database configuration is invalid. + if !d.IsValid() { + return + } queryFailures := databaseFailures[d.Name] if queryFailures == 3 { diff --git a/collector/collector.go b/collector/collector.go index c7c34e28..5767e975 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -237,6 +237,12 @@ func (e *Exporter) scheduledScrape(tick *time.Time) { } func (e *Exporter) scrapeDatabase(ch chan<- prometheus.Metric, errChan chan<- error, d *Database, tick *time.Time) int { + // If the database configuration is invalid, do not attempt to ping or reestablish the database connection. + if !d.IsValid() { + e.logger.Warn("Invalid database configuration, will not attempt reconnection", "database", d.Name) + errChan <- fmt.Errorf("database %s is invalid, will not be scraped", d.Name) + return 1 + } // If ping fails, we will try again on the next iteration of metrics scraping if err := d.ping(e.logger); err != nil { e.logger.Error("Error pinging database", "error", err, "database", d.Name) diff --git a/collector/database.go b/collector/database.go index 90cdc2f8..9f5fe21f 100644 --- a/collector/database.go +++ b/collector/database.go @@ -6,6 +6,7 @@ package collector import ( "context" "database/sql" + "errors" "fmt" "github.com/godror/godror" "github.com/godror/godror/dsn" @@ -49,6 +50,10 @@ func (d *Database) ping(logger *slog.Logger) error { err := d.Session.PingContext(ctx) if err != nil { d.Up = 0 + if isInvalidCredentialsError(err) { + d.invalidate() + return err + } // If database is closed, try to reconnect if strings.Contains(err.Error(), "sql: database is closed") { db, dbtype := connect(logger, d.Name, d.Config) @@ -83,6 +88,7 @@ func NewDatabase(logger *slog.Logger, dbname string, dbconfig DatabaseConfig) *D Session: db, Type: dbtype, Config: dbconfig, + Valid: true, } } @@ -127,6 +133,26 @@ func (d *Database) WarmupConnectionPool(logger *slog.Logger) { } } +func (d *Database) IsValid() bool { + return d.Valid +} + +func (d *Database) invalidate() { + d.Valid = false +} + +func isInvalidCredentialsError(err error) bool { + err = errors.Unwrap(err) + if err == nil { + return false + } + oraErr, ok := err.(*godror.OraErr) + if !ok { + return false + } + return oraErr.Code() == 1017 +} + func connect(logger *slog.Logger, dbname string, dbconfig DatabaseConfig) (*sql.DB, float64) { logger.Debug("Launching connection to "+maskDsn(dbconfig.URL), "database", dbname) diff --git a/collector/types.go b/collector/types.go index 12f2a85b..003149bc 100644 --- a/collector/types.go +++ b/collector/types.go @@ -34,6 +34,8 @@ type Database struct { // MetricsCache holds computed metrics for a database, so these metrics are available on each scrape. // Given a metric's scrape configuration, it may not be computed on the same interval as other metrics. MetricsCache *MetricsCache + + Valid bool } type MetricsCache struct { From b4b968214ef882aa2693003b6a42ffd93338852a Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Fri, 26 Sep 2025 11:12:33 -0700 Subject: [PATCH 2/3] Invalid database entry on failed login Signed-off-by: Anders Swanson --- site/docs/releases/changelog.md | 1 + site/docs/releases/roadmap.md | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/site/docs/releases/changelog.md b/site/docs/releases/changelog.md index 33cd41b8..e3641f04 100644 --- a/site/docs/releases/changelog.md +++ b/site/docs/releases/changelog.md @@ -13,6 +13,7 @@ Our current priorities to support metrics for advanced database features and use - Updated project dependencies. - Standardize multi-arch builds and document supported database versions. +- If the exporter fails to connect to a database due to invalid credentials (ORA-01017 error), that database configuration will be invalidated and the exporter will not attempt to re-establish the database connection. Other databases will continue to be scraped. - Metrics with an empty databases array (`databases = []`) are now considered disabled, and will not be scraped. - Increased the default query timeout for the `top_sql` metric to 10 seconds (previously 5 seconds). - Metrics using the `scrapeinterval` property will no longer be scraped on every request if they have a cached value. This only applies when the metrics exporter is configured to scrape metrics _on request_, rather than on a global interval. diff --git a/site/docs/releases/roadmap.md b/site/docs/releases/roadmap.md index b19aa0ec..a1b89425 100644 --- a/site/docs/releases/roadmap.md +++ b/site/docs/releases/roadmap.md @@ -12,10 +12,10 @@ We welcome input on community-driven features you'd like to see supported. Pleas Currently, we plan to address the following key features: - Provide default Oracle Exadata metrics -- Implement connection storm protection: prevent the exporter from repeatedly connecting when the credentials fail, to prevent a storm of connections causing accounts to be locked across a large number of databases +- Provide default GoldenGate metrics +- Enhance database alert logging and alert log metrics - Provide the option to have the Oracle client outside of the container image, e.g., on a shared volume, - Implement the ability to update the configuration dynamically, i.e., without a restart - Implement support for tracing within the database, e.g., using an execution context ID provide by an external caller - Provide additional pre-built Grafana dashboards, - Integration with Spring Observability, e.g., Micrometer -- Provide additional documentation and samples From e659d48790e371f17f0e031411a60d9ac782ad30 Mon Sep 17 00:00:00 2001 From: Anders Swanson Date: Fri, 26 Sep 2025 11:15:16 -0700 Subject: [PATCH 3/3] Invalid database entry on failed login Signed-off-by: Anders Swanson --- collector/database.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/collector/database.go b/collector/database.go index 9f5fe21f..5ef3894d 100644 --- a/collector/database.go +++ b/collector/database.go @@ -16,6 +16,10 @@ import ( "time" ) +const ( + ora01017code = 1017 +) + func (d *Database) UpMetric(exporterLabels map[string]string) prometheus.Metric { desc := prometheus.NewDesc( prometheus.BuildFQName(namespace, "", "up"), @@ -150,7 +154,7 @@ func isInvalidCredentialsError(err error) bool { if !ok { return false } - return oraErr.Code() == 1017 + return oraErr.Code() == ora01017 } func connect(logger *slog.Logger, dbname string, dbconfig DatabaseConfig) (*sql.DB, float64) {