-
Notifications
You must be signed in to change notification settings - Fork 5.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[core] Periodically GC metadata for streaming generators #43772
Changes from 4 commits
19efd2b
00dacb3
6f25782
12e2725
a9d1d62
b1be839
abdd963
cd8dafb
78b463c
f2901f5
b4c9512
1fc4e72
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -641,6 +641,11 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_ | |
RayConfig::instance().metrics_report_interval_ms() / 2, | ||
"CoreWorker.RecordMetrics"); | ||
|
||
periodical_runner_.RunFnPeriodically( | ||
[this] { TryDeleteObjectRefStreams(); }, | ||
RayConfig::instance().local_gc_min_interval_s() * 1000, | ||
"CoreWorker.GCStreamingGeneratorMetadata"); | ||
|
||
#ifndef _WIN32 | ||
// Doing this last during CoreWorker initialization, so initialization logic like | ||
// registering with Raylet can finish with higher priority. | ||
|
@@ -2963,8 +2968,33 @@ Status CoreWorker::SealReturnObject(const ObjectID &return_id, | |
return status; | ||
} | ||
|
||
void CoreWorker::DelObjectRefStream(const ObjectID &generator_id) { | ||
task_manager_->DelObjectRefStream(generator_id); | ||
void CoreWorker::AsyncDelObjectRefStream(const ObjectID &generator_id) { | ||
RAY_LOG(DEBUG) << "AsyncDelObjectRefStream " << generator_id; | ||
deleted_generator_ids_.insert(generator_id); | ||
} | ||
|
||
void CoreWorker::TryDeleteObjectRefStreams() { | ||
std::vector<ObjectID> out_of_scope_generator_ids; | ||
for (auto it = deleted_generator_ids_.begin(); it != deleted_generator_ids_.end(); | ||
it++) { | ||
const auto &generator_id = *it; | ||
RAY_LOG(DEBUG) << "Try DelObjectRefStream " << generator_id; | ||
int64_t num_objects_generated = 0; | ||
if (!task_manager_->StreamingGeneratorIsFinished(generator_id, | ||
&num_objects_generated)) { | ||
continue; | ||
stephanie-wang marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
bool can_gc = reference_counter_->CheckGeneratorRefsOutOfScope(generator_id, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we remove all unconsumed objects here (and add a test)? This seems like a regression (where unconsumed objects are not deleted if lineage is alive). E.g., when delete hapapens, all unconsumed objects are deleted (same behavior as now). And only the stream metadata is cleaned up after a delay. |
||
num_objects_generated); | ||
if (can_gc && task_manager_->DelObjectRefStream(generator_id)) { | ||
out_of_scope_generator_ids.push_back(generator_id); | ||
} | ||
} | ||
|
||
for (const auto &generator_id : out_of_scope_generator_ids) { | ||
deleted_generator_ids_.erase(generator_id); | ||
} | ||
} | ||
|
||
Status CoreWorker::TryReadObjectRefStream(const ObjectID &generator_id, | ||
|
@@ -2977,8 +3007,8 @@ Status CoreWorker::TryReadObjectRefStream(const ObjectID &generator_id, | |
return status; | ||
} | ||
|
||
bool CoreWorker::IsFinished(const ObjectID &generator_id) const { | ||
return task_manager_->IsFinished(generator_id); | ||
bool CoreWorker::StreamingGeneratorIsFinished(const ObjectID &generator_id) const { | ||
return task_manager_->StreamingGeneratorIsFinished(generator_id); | ||
} | ||
|
||
std::pair<rpc::ObjectReference, bool> CoreWorker::PeekObjectRefStream( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like the wrong value? (it is already a second, but you multiply 1000).
Besides, do you think 10 seconds is good enough? Feel like it needs to be a little more frequent
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's to convert to ms?
Should be fine because we trigger it once immediately now.