Skip to content

Commit

Permalink
fix: -ve metrics and return early if isLate (#495)
Browse files Browse the repository at this point in the history
Signed-off-by: Vigith Maurice <vigith@gmail.com>
Signed-off-by: Yashash H L <yashashhl25@gmail.com>
Co-authored-by: Yashash H L <yashashhl25@gmail.com>
  • Loading branch information
vigith and yhl25 committed Jan 19, 2023
1 parent 111f2b3 commit a3024f4
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 17 deletions.
21 changes: 14 additions & 7 deletions pkg/reduce/pbq/store/wal/stores.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func NewWALStores(vertexInstance *dfv1.VertexInstance, opts ...Option) store.Sto
return s
}

func (ws *walStores) CreateStore(ctx context.Context, partitionID partition.ID) (store.Store, error) {
func (ws *walStores) CreateStore(_ context.Context, partitionID partition.ID) (store.Store, error) {
// Create wal dir if not exist
var err error
if _, err = os.Stat(ws.storePath); os.IsNotExist(err) {
Expand All @@ -77,6 +77,17 @@ func (ws *walStores) CreateStore(ctx context.Context, partitionID partition.ID)
func (ws *walStores) openOrCreateWAL(id *partition.ID) (*WAL, error) {
var err error

defer func() {
// increment active WAL count only if we are able to successfully create/open one.
if err == nil {
activeFilesCount.With(map[string]string{
metrics.LabelPipeline: ws.pipelineName,
metrics.LabelVertex: ws.vertexName,
metrics.LabelVertexReplicaIndex: strconv.Itoa(int(ws.replicaIndex)),
}).Inc()
}
}()

filePath := getSegmentFilePath(id, ws.storePath)
stat, err := os.Stat(filePath)

Expand All @@ -89,16 +100,12 @@ func (ws *walStores) openOrCreateWAL(id *partition.ID) (*WAL, error) {
if err != nil {
return nil, err
}
// we are interested only in the number of new files created
filesCount.With(map[string]string{
metrics.LabelPipeline: ws.pipelineName,
metrics.LabelVertex: ws.vertexName,
metrics.LabelVertexReplicaIndex: strconv.Itoa(int(ws.replicaIndex)),
}).Inc()
activeFilesCount.With(map[string]string{
metrics.LabelPipeline: ws.pipelineName,
metrics.LabelVertex: ws.vertexName,
metrics.LabelVertexReplicaIndex: strconv.Itoa(int(ws.replicaIndex)),
}).Inc()
wal = &WAL{
fp: fp,
openMode: os.O_WRONLY,
Expand Down Expand Up @@ -151,7 +158,7 @@ func (ws *walStores) openOrCreateWAL(id *partition.ID) (*WAL, error) {
return wal, err
}

func (ws *walStores) DiscoverPartitions(ctx context.Context) ([]partition.ID, error) {
func (ws *walStores) DiscoverPartitions(_ context.Context) ([]partition.ID, error) {
files, err := os.ReadDir(ws.storePath)
if os.IsNotExist(err) {
return []partition.ID{}, nil
Expand Down
21 changes: 11 additions & 10 deletions pkg/reduce/readloop/readloop.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,17 @@ func (rl *ReadLoop) writeMessagesToWindows(ctx context.Context, messages []*isb.

messagesLoop:
for _, message := range messages {
// drop the late messages
if message.IsLate {
rl.log.Warnw("Dropping the late message", zap.Time("eventTime", message.EventTime), zap.Time("watermark", message.Watermark))
droppedMessagesCount.With(map[string]string{
metrics.LabelVertex: rl.vertexName,
metrics.LabelPipeline: rl.pipelineName,
metrics.LabelVertexReplicaIndex: strconv.Itoa(int(rl.vertexReplica)),
LabelReason: "late"}).Inc()
continue
}

// NOTE(potential bug): if we get a message where the event time is < watermark, skip processing the message.
// This could be due to a couple of problem, eg. ack was not registered, etc.
// Please do not confuse this with late data! This is a platform related problem causing the watermark inequality
Expand Down Expand Up @@ -368,16 +379,6 @@ func (rl *ReadLoop) ShutDown(ctx context.Context) {
// upsertWindowsAndKeys will create or assigns (if already present) a window to the message. It is an upsert operation
// because windows are created out of order, but they will be closed in-order.
func (rl *ReadLoop) upsertWindowsAndKeys(m *isb.ReadMessage) []window.AlignedKeyedWindower {
// drop the late messages
if m.IsLate {
rl.log.Warnw("Dropping the late message", zap.Time("eventTime", m.EventTime), zap.Time("watermark", m.Watermark))
droppedMessagesCount.With(map[string]string{
metrics.LabelVertex: rl.vertexName,
metrics.LabelPipeline: rl.pipelineName,
metrics.LabelVertexReplicaIndex: strconv.Itoa(int(rl.vertexReplica)),
LabelReason: "late"}).Inc()
return []window.AlignedKeyedWindower{}
}

processingWindows := rl.windower.AssignWindow(m.EventTime)
var kWindows []window.AlignedKeyedWindower
Expand Down

0 comments on commit a3024f4

Please sign in to comment.