Skip to content

Commit

Permalink
osd: Try other PGs when reservation failures occur
Browse files Browse the repository at this point in the history
Fixes: https://tracker.ceph.com/issues/48843

Signed-off-by: David Zafman <dzafman@redhat.com>
  • Loading branch information
dzafman committed Mar 5, 2021
1 parent ea41874 commit 08c3ede
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 2 deletions.
3 changes: 3 additions & 0 deletions PendingReleaseNotes
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@
is enabled. This helps with the monitor logs on larger clusters, that may get
many 'osd.X reported immediately failed by osd.Y' messages, and confuse tools.

* Scubs are more aggressive in trying to find more simultaneous possible PGs within osd_max_scrubs limitation.
It is possible that increasing osd_scrub_sleep may be necessary to maintain client responsiveness.

>=15.0.0
--------

Expand Down
34 changes: 32 additions & 2 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7617,6 +7617,13 @@ void OSD::sched_scrub()
continue;
}

// If this one couldn't reserve, skip for now
if (pg->get_reserve_failed()) {
pg->unlock();
dout(20) << __func__ << " pg " << scrub_job.pgid << " reserve failed, skipped" << dendl;
continue;
}

// This has already started, so go on to the next scrub job
if (pg->is_scrub_active()) {
pg->unlock();
Expand All @@ -7636,7 +7643,7 @@ void OSD::sched_scrub()
if (pg->m_scrubber->is_reserving()) {
pg->unlock();
dout(10) << __func__ << ": reserve in progress pgid " << scrub_job.pgid << dendl;
break;
goto out;
}
dout(15) << "sched_scrub scrubbing " << scrub_job.pgid << " at " << scrub_job.sched_time
<< (pg->get_must_scrub() ? ", explicitly requested" :
Expand All @@ -7645,11 +7652,34 @@ void OSD::sched_scrub()
if (pg->sched_scrub()) {
pg->unlock();
dout(10) << __func__ << " scheduled a scrub!" << " (~" << scrub_job.pgid << "~)" << dendl;
break;
goto out;
}
// If this is set now we must have had a local reserve failure, so can't scrub anything right now
if (pg->get_reserve_failed()) {
pg->unlock();
dout(20) << __func__ << " pg " << scrub_job.pgid << " local reserve failed, nothing to be done now" << dendl;
goto out;
}

pg->unlock();
} while (service.next_scrub_stamp(scrub_job, &scrub_job));

// Clear reserve_failed from all pending PGs, so we try again
if (service.first_scrub_stamp(&scrub_job)) {
do {
if (scrub_job.sched_time > now)
break;
PGRef pg = _lookup_lock_pg(scrub_job.pgid);
// If we can't lock, it's ok we can get it next time
if (!pg)
continue;
pg->clear_reserve_failed();
pg->unlock();
} while (service.next_scrub_stamp(scrub_job, &scrub_job));
}
}

out:
dout(20) << "sched_scrub done" << dendl;
}

Expand Down
1 change: 1 addition & 0 deletions src/osd/PG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,7 @@ bool PG::sched_scrub()
// be retried by the OSD later on.
if (!m_scrubber->reserve_local()) {
dout(10) << __func__ << ": failed to reserve locally" << dendl;
set_reserve_failed();
return false;
}

Expand Down
4 changes: 4 additions & 0 deletions src/osd/PG.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
/// scrubbing state for both Primary & replicas
bool is_scrub_active() const { return m_scrubber->is_scrub_active(); }

bool get_reserve_failed() const { return m_scrubber->get_reserve_failed(); }
void set_reserve_failed() { m_scrubber->set_reserve_failed(); }
void clear_reserve_failed() { m_scrubber->clear_reserve_failed(); }

public:
// -- members --
const coll_t coll;
Expand Down
7 changes: 7 additions & 0 deletions src/osd/pg_scrubber.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,10 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {

[[nodiscard]] bool is_scrub_active() const final { return m_active; }

[[nodiscard]] bool get_reserve_failed() const final { return m_reserve_failed; }
void set_reserve_failed() final { m_reserve_failed = true; }
void clear_reserve_failed() final { m_reserve_failed = false; }

private:
void reset_internal_state();

Expand Down Expand Up @@ -536,6 +540,9 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {

bool m_active{false};

// This PG could not get all the scrub reservations
bool m_reserve_failed{false};

eversion_t m_subset_last_update{};

std::unique_ptr<Scrub::Store> m_store;
Expand Down
2 changes: 2 additions & 0 deletions src/osd/scrub_machine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ sc::result ReservingReplicas::react(const ReservationFailure&)
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;

// Mark PG so that we will try other PGs, before coming back to this one
scrbr->set_reserve_failed();
// the Scrubber must release all resources and abort the scrubbing
scrbr->clear_pgscrub_state();
return transit<NotActive>();
Expand Down
4 changes: 4 additions & 0 deletions src/osd/scrub_machine_lstnr.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ struct ScrubMachineListener {

virtual void unreserve_replicas() = 0;

[[nodiscard]] virtual bool get_reserve_failed() const = 0;
virtual void set_reserve_failed() = 0;
virtual void clear_reserve_failed() = 0;

/**
* the FSM interface into the "are we waiting for maps, either our own or from
* replicas" state.
Expand Down
4 changes: 4 additions & 0 deletions src/osd/scrubber_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ struct ScrubPgIF {
*/
[[nodiscard]] virtual bool is_scrub_active() const = 0;

[[nodiscard]] virtual bool get_reserve_failed() const = 0;
virtual void set_reserve_failed() = 0;
virtual void clear_reserve_failed() = 0;

/// are we waiting for resource reservation grants form our replicas?
[[nodiscard]] virtual bool is_reserving() const = 0;

Expand Down

0 comments on commit 08c3ede

Please sign in to comment.