Skip to content

Commit

Permalink
osd: Cancel backfill when can't proceed due to errors
Browse files Browse the repository at this point in the history
Add new transition CancelBackfill (Backfilling -> NotBackfilling)
When giving up on backfill due to errors use new transition
which includes scheduling retry of backfill.

Signed-off-by: David Zafman <dzafman@redhat.com>
  • Loading branch information
dzafman committed Jun 23, 2017
1 parent a3cd5e6 commit e708410
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9124,9 +9124,16 @@ void OSD::do_recovery(
if (!more && pg->have_unfound()) {
pg->discover_all_missing(*rctx.query_map);
if (rctx.query_map->empty()) {
dout(10) << "do_recovery no luck, giving up on this pg for now" << dendl;
dout(10) << __func__ << ": no luck, giving up on this pg for now" << dendl;
if (pg->state_test(PG_STATE_BACKFILL)) {
auto evt = PG::CephPeeringEvtRef(new PG::CephPeeringEvt(
queued,
queued,
PG::CancelBackfill()));
pg->queue_peering_event(evt);
}
} else {
dout(10) << "do_recovery no luck, giving up on this pg for now" << dendl;
dout(10) << __func__ << ": no luck, giving up on this pg for now" << dendl;
pg->queue_recovery();
}
}
Expand Down
31 changes: 31 additions & 0 deletions src/osd/PG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6273,6 +6273,37 @@ PG::RecoveryState::Backfilling::Backfilling(my_context ctx)
pg->publish_stats_to_osd();
}

boost::statechart::result
PG::RecoveryState::Backfilling::react(const CancelBackfill &)
{
PG *pg = context< RecoveryMachine >().pg;
pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
// XXX: Add a new pg state so user can see why backfill isn't proceeding
// Can't use PG_STATE_BACKFILL_WAIT since it means waiting for reservations
//pg->state_set(PG_STATE_BACKFILL_STALLED????);

for (set<pg_shard_t>::iterator it = pg->backfill_targets.begin();
it != pg->backfill_targets.end();
++it) {
assert(*it != pg->pg_whoami);
ConnectionRef con = pg->osd->get_con_osd_cluster(
it->osd, pg->get_osdmap()->get_epoch());
if (con) {
pg->osd->send_message_osd_cluster(
new MBackfillReserve(
MBackfillReserve::REJECT,
spg_t(pg->info.pgid.pgid, it->shard),
pg->get_osdmap()->get_epoch()),
con.get());
}
}

pg->waiting_on_backfill.clear();

pg->schedule_backfill_full_retry();
return transit<NotBackfilling>();
}

boost::statechart::result
PG::RecoveryState::Backfilling::react(const RemoteReservationRejected &)
{
Expand Down
3 changes: 3 additions & 0 deletions src/osd/PG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1560,6 +1560,7 @@ class PG : public DoutPrefixProvider {
TrivialEvent(LocalBackfillReserved)
TrivialEvent(RemoteBackfillReserved)
TrivialEvent(RemoteReservationRejected)
TrivialEvent(CancelBackfill)
TrivialEvent(RequestBackfill)
TrivialEvent(RequestRecovery)
TrivialEvent(RecoveryDone)
Expand Down Expand Up @@ -1871,10 +1872,12 @@ class PG : public DoutPrefixProvider {
struct Backfilling : boost::statechart::state< Backfilling, Active >, NamedState {
typedef boost::mpl::list<
boost::statechart::transition< Backfilled, Recovered >,
boost::statechart::custom_reaction< CancelBackfill >,
boost::statechart::custom_reaction< RemoteReservationRejected >
> reactions;
explicit Backfilling(my_context ctx);
boost::statechart::result react(const RemoteReservationRejected& evt);
boost::statechart::result react(const CancelBackfill& evt);
void exit();
};

Expand Down

0 comments on commit e708410

Please sign in to comment.