-
Couldn't load subscription status.
- Fork 130
fix(pegboard): revise actor rescheduling algorithm, add client metrics #2531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -938,9 +938,19 @@ struct UpdateMetricsInput { | |||||||||||||||||
|
|
||||||||||||||||||
| #[activity(UpdateMetrics)] | ||||||||||||||||||
| async fn update_metrics(ctx: &ActivityCtx, input: &UpdateMetricsInput) -> GlobalResult<()> { | ||||||||||||||||||
| let (memory, cpu) = if input.clear { | ||||||||||||||||||
| (0, 0) | ||||||||||||||||||
| } else { | ||||||||||||||||||
| if input.clear { | ||||||||||||||||||
| metrics::CLIENT_MEMORY_ALLOCATED | ||||||||||||||||||
| .with_label_values(&[&input.client_id.to_string(), &input.flavor.to_string()]) | ||||||||||||||||||
| .set(0); | ||||||||||||||||||
|
|
||||||||||||||||||
| metrics::CLIENT_CPU_ALLOCATED | ||||||||||||||||||
| .with_label_values(&[&input.client_id.to_string(), &input.flavor.to_string()]) | ||||||||||||||||||
| .set(0); | ||||||||||||||||||
|
|
||||||||||||||||||
| return Ok(()); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| let (total_mem, total_cpu, remaining_mem, remaining_cpu) = | ||||||||||||||||||
| ctx.fdb() | ||||||||||||||||||
|
Comment on lines
+953
to
954
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: Tuple order in destructuring doesn't match database query results. total_mem and total_cpu are swapped compared to query result order.
Suggested change
|
||||||||||||||||||
| .await? | ||||||||||||||||||
| .run(|tx, _mc| async move { | ||||||||||||||||||
|
|
@@ -983,21 +993,33 @@ async fn update_metrics(ctx: &ActivityCtx, input: &UpdateMetricsInput) -> Global | |||||||||||||||||
| .map_err(|x| fdb::FdbBindingError::CustomError(x.into()))?; | ||||||||||||||||||
|
|
||||||||||||||||||
| Ok(( | ||||||||||||||||||
| total_mem.saturating_sub(remaining_mem), | ||||||||||||||||||
| total_cpu.saturating_sub(remaining_cpu), | ||||||||||||||||||
| total_mem, | ||||||||||||||||||
| remaining_mem, | ||||||||||||||||||
| total_cpu, | ||||||||||||||||||
| remaining_cpu, | ||||||||||||||||||
|
Comment on lines
+996
to
+999
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. logic: Tuple return order doesn't match variable names above. Should be (total_mem, total_cpu, remaining_mem, remaining_cpu) to match destructuring.
Suggested change
|
||||||||||||||||||
| )) | ||||||||||||||||||
| }) | ||||||||||||||||||
| .custom_instrument(tracing::info_span!("client_update_metrics_tx")) | ||||||||||||||||||
| .await? | ||||||||||||||||||
| }; | ||||||||||||||||||
| .await?; | ||||||||||||||||||
|
|
||||||||||||||||||
| metrics::CLIENT_CPU_ALLOCATED | ||||||||||||||||||
| metrics::CLIENT_MEMORY_TOTAL | ||||||||||||||||||
| .with_label_values(&[&input.client_id.to_string(), &input.flavor.to_string()]) | ||||||||||||||||||
| .set(total_mem.try_into()?); | ||||||||||||||||||
|
|
||||||||||||||||||
| metrics::CLIENT_CPU_TOTAL | ||||||||||||||||||
| .with_label_values(&[&input.client_id.to_string(), &input.flavor.to_string()]) | ||||||||||||||||||
| .set(cpu.try_into()?); | ||||||||||||||||||
| .set(total_cpu.try_into()?); | ||||||||||||||||||
|
|
||||||||||||||||||
| let allocated_mem = total_mem.saturating_sub(remaining_mem); | ||||||||||||||||||
| let allocated_cpu = total_cpu.saturating_sub(remaining_cpu); | ||||||||||||||||||
|
|
||||||||||||||||||
| metrics::CLIENT_MEMORY_ALLOCATED | ||||||||||||||||||
| .with_label_values(&[&input.client_id.to_string(), &input.flavor.to_string()]) | ||||||||||||||||||
| .set(memory.try_into()?); | ||||||||||||||||||
| .set(allocated_mem.try_into()?); | ||||||||||||||||||
|
|
||||||||||||||||||
| metrics::CLIENT_CPU_ALLOCATED | ||||||||||||||||||
| .with_label_values(&[&input.client_id.to_string(), &input.flavor.to_string()]) | ||||||||||||||||||
| .set(allocated_cpu.try_into()?); | ||||||||||||||||||
|
|
||||||||||||||||||
| Ok(()) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
logic: Potential race condition - state.last_retry_ts is updated after the check, which could lead to incorrect retry count calculations if multiple retries happen very close together