Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[core] log dedup should not dedup number only lines #45485

Merged
merged 3 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions python/ray/_private/ray_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,16 @@ def deduplicate(self, batch: LogBatch) -> List[LogBatch]:
continue
dedup_key = _canonicalise_log_line(line)

if dedup_key == "":
# Don't dedup messages that are empty after canonicalization.
# Because that's all the information users want to see.
output[0]["lines"].append(line)
continue

if dedup_key in self.recent:
sources = self.recent[dedup_key].sources
sources.add(source)
# We deduplicate the warnings/errorm essages from
# raylet by default.
# We deduplicate the warnings/error messages from raylet by default.
if len(sources) > 1 or batch["pid"] == "raylet":
state = self.recent[dedup_key]
self.recent[dedup_key] = DedupState(
Expand Down
54 changes: 54 additions & 0 deletions python/ray/tests/test_log_dedup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,60 @@ def test_nodedup_logs_single_process():
assert out1 == [batch1]


def test_nodedup_logs_buffer_only_lines():
now = 142300000.0

def gettime():
return now

dedup = LogDeduplicator(5, None, None, _timesource=gettime)
batch1 = {
"ip": "node1",
"pid": 100,
# numbers are canonicalised, so this would lead to empty dedup_key
"lines": ["1"],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since we are testing LogDeduplicator can you add more duplicate lines? like 2 then 2 and all are outputed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}

# Immediately prints always.
out1 = dedup.deduplicate(batch1)
assert out1 == [batch1]

now += 1.0

# Should print new lines even if it is number only again
batch2 = {
"ip": "node2",
"pid": 200,
"lines": ["2"],
}
out2 = dedup.deduplicate(batch2)
assert out2 == [
{
"ip": "node2",
"pid": 200,
"lines": ["2"],
}
]

now += 3.0

# Should print new lines even if it is same number
batch3 = {
"ip": "node3",
"pid": 300,
"lines": ["2"],
}
# Should buffer duplicates.
out3 = dedup.deduplicate(batch3)
assert out3 == [
{
"ip": "node3",
"pid": 300,
"lines": ["2"],
}
]


def test_dedup_logs_multiple_processes():
now = 142300000.0

Expand Down
Loading