Skip to content

Commit

Permalink
Merge pull request #8 from smartnews/spot-metrics
Browse files Browse the repository at this point in the history
chore: add spot interruption metrics with zone and instance type
  • Loading branch information
Luke-Smartnews committed May 8, 2024
2 parents dd236c8 + 8f90524 commit 1f59c19
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/controllers/interruption/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func (c *Controller) handleNodeClaim(ctx context.Context, msg messages.Message,
if zone != "" && instanceType != "" {
c.unavailableOfferingsCache.MarkUnavailable(ctx, string(msg.Kind()), instanceType, zone, v1beta1.CapacityTypeSpot)
}
spotTotal.WithLabelValues(instanceType, zone).Inc()
// try to create a new nodeclaim immediately but ignore error if it fails
if err := c.createNodeClaim(ctx, nodeClaim); err != nil {
logging.FromContext(ctx).Errorf("[interruption handling]failed to create a new nodeclaim, %v", err)
Expand Down
11 changes: 11 additions & 0 deletions pkg/controllers/interruption/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ const (
messageTypeLabel = "message_type"
actionTypeLabel = "action_type"
terminationReasonLabel = "interruption"
instanceTypeLabel = "instance_type"
zoneLabel = "zone"
)

var (
Expand Down Expand Up @@ -64,6 +66,15 @@ var (
},
[]string{actionTypeLabel},
)
spotTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.Namespace,
Subsystem: interruptionSubsystem,
Name: "spot_total",
Help: "Number of the spot interruption. Labeled by AZ, instance type",
},
[]string{instanceTypeLabel, zoneLabel},
)
)

func init() {
Expand Down

0 comments on commit 1f59c19

Please sign in to comment.