Skip to content

Commit

Permalink
Merge pull request ceph#29857 from dzafman/wip-41255
Browse files Browse the repository at this point in the history
backfill_toofull seen on cluster where the most full OSD is at 1%

Reviewed-by: xie xingguo <xie.xingguo@zte.com.cn>
Reviewed-by: Neha Ojha <nojha@redhat.com>
  • Loading branch information
tchaikov committed Aug 29, 2019
2 parents 5a5d417 + 0115595 commit 94b8839
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 40 deletions.
22 changes: 11 additions & 11 deletions src/messages/MBackfillReserve.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ class MBackfillReserve : public MOSDPeeringOp {
enum {
REQUEST = 0, // primary->replica: please reserve a slot
GRANT = 1, // replica->primary: ok, i reserved it
REJECT = 2, // replica->primary: sorry, try again later (*)
REJECT_TOOFULL = 2, // replica->primary: too full, sorry, try again later (*)
RELEASE = 3, // primary->replcia: release the slot i reserved before
TOOFULL = 4, // replica->primary: too full, stop backfilling
REVOKE_TOOFULL = 4, // replica->primary: too full, stop backfilling
REVOKE = 5, // replica->primary: i'm taking back the slot i gave you
// (*) NOTE: prior to luminous, REJECT was overloaded to also mean release
};
Expand Down Expand Up @@ -62,7 +62,7 @@ class MBackfillReserve : public MOSDPeeringOp {
query_epoch,
query_epoch,
RemoteBackfillReserved());
case REJECT:
case REJECT_TOOFULL:
// NOTE: this is replica -> primary "i reject your request"
// and also primary -> replica "cancel my previously-granted request"
// (for older peers)
Expand All @@ -71,13 +71,13 @@ class MBackfillReserve : public MOSDPeeringOp {
return new PGPeeringEvent(
query_epoch,
query_epoch,
RemoteReservationRejected());
RemoteReservationRejectedTooFull());
case RELEASE:
return new PGPeeringEvent(
query_epoch,
query_epoch,
RemoteReservationCanceled());
case TOOFULL:
case REVOKE_TOOFULL:
return new PGPeeringEvent(
query_epoch,
query_epoch,
Expand Down Expand Up @@ -118,14 +118,14 @@ class MBackfillReserve : public MOSDPeeringOp {
case GRANT:
out << "GRANT";
break;
case REJECT:
out << "REJECT ";
case REJECT_TOOFULL:
out << "REJECT_TOOFULL";
break;
case RELEASE:
out << "RELEASE";
break;
case TOOFULL:
out << "TOOFULL";
case REVOKE_TOOFULL:
out << "REVOKE_TOOFULL";
break;
case REVOKE:
out << "REVOKE";
Expand Down Expand Up @@ -158,8 +158,8 @@ class MBackfillReserve : public MOSDPeeringOp {
header.compat_version = 3;
encode(pgid.pgid, payload);
encode(query_epoch, payload);
encode((type == RELEASE || type == TOOFULL || type == REVOKE) ?
REJECT : type, payload);
encode((type == RELEASE || type == REVOKE_TOOFULL || type == REVOKE) ?
REJECT_TOOFULL : type, payload);
encode(priority, payload);
encode(pgid.shard, payload);
return;
Expand Down
2 changes: 1 addition & 1 deletion src/osd/PGPeeringEvent.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ struct RequestRecoveryPrio : boost::statechart::event< RequestRecoveryPrio > {

TrivialEvent(NullEvt)
TrivialEvent(RemoteBackfillReserved)
TrivialEvent(RemoteReservationRejected)
TrivialEvent(RemoteReservationRejectedTooFull)
TrivialEvent(RemoteReservationRevokedTooFull)
TrivialEvent(RemoteReservationRevoked)
TrivialEvent(RemoteReservationCanceled)
Expand Down
23 changes: 12 additions & 11 deletions src/osd/PeeringState.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ void PeeringState::reject_reservation()
pl->send_cluster_message(
primary.osd,
new MBackfillReserve(
MBackfillReserve::REJECT,
MBackfillReserve::REJECT_TOOFULL,
spg_t(info.pgid.pgid, primary.shard),
get_osdmap_epoch()),
get_osdmap_epoch());
Expand Down Expand Up @@ -4553,7 +4553,6 @@ void PeeringState::WaitRemoteBackfillReserved::retry()
}

ps->state_clear(PG_STATE_BACKFILL_WAIT);
ps->state_set(PG_STATE_BACKFILL_TOOFULL);
pl->publish_stats_to_osd();

pl->schedule_event_after(
Expand All @@ -4565,8 +4564,10 @@ void PeeringState::WaitRemoteBackfillReserved::retry()
}

boost::statechart::result
PeeringState::WaitRemoteBackfillReserved::react(const RemoteReservationRejected &evt)
PeeringState::WaitRemoteBackfillReserved::react(const RemoteReservationRejectedTooFull &evt)
{
DECLARE_LOCALS;
ps->state_set(PG_STATE_BACKFILL_TOOFULL);
retry();
return transit<NotBackfilling>();
}
Expand Down Expand Up @@ -4626,7 +4627,7 @@ PeeringState::NotBackfilling::react(const RemoteBackfillReserved &evt)
}

boost::statechart::result
PeeringState::NotBackfilling::react(const RemoteReservationRejected &evt)
PeeringState::NotBackfilling::react(const RemoteReservationRejectedTooFull &evt)
{
return discard_event();
}
Expand Down Expand Up @@ -4670,11 +4671,11 @@ PeeringState::RepNotRecovering::RepNotRecovering(my_context ctx)
}

boost::statechart::result
PeeringState::RepNotRecovering::react(const RejectRemoteReservation &evt)
PeeringState::RepNotRecovering::react(const RejectTooFullRemoteReservation &evt)
{
DECLARE_LOCALS;
ps->reject_reservation();
post_event(RemoteReservationRejected());
post_event(RemoteReservationRejectedTooFull());
return discard_event();
}

Expand Down Expand Up @@ -4743,7 +4744,7 @@ PeeringState::RepNotRecovering::react(const RequestBackfillPrio &evt)

if (!pl->try_reserve_recovery_space(
evt.primary_num_bytes, evt.local_num_bytes)) {
post_event(RejectRemoteReservation());
post_event(RejectTooFullRemoteReservation());
} else {
PGPeeringEventRef preempt;
if (HAVE_FEATURE(ps->upacting_features, RECOVERY_RESERVATION_2)) {
Expand Down Expand Up @@ -4818,17 +4819,17 @@ PeeringState::RepWaitBackfillReserved::react(const RemoteBackfillReserved &evt)

boost::statechart::result
PeeringState::RepWaitBackfillReserved::react(
const RejectRemoteReservation &evt)
const RejectTooFullRemoteReservation &evt)
{
DECLARE_LOCALS;
ps->reject_reservation();
post_event(RemoteReservationRejected());
post_event(RemoteReservationRejectedTooFull());
return discard_event();
}

boost::statechart::result
PeeringState::RepWaitBackfillReserved::react(
const RemoteReservationRejected &evt)
const RemoteReservationRejectedTooFull &evt)
{
DECLARE_LOCALS;
pl->unreserve_recovery_space();
Expand Down Expand Up @@ -4883,7 +4884,7 @@ PeeringState::RepRecovering::react(const BackfillTooFull &)
pl->send_cluster_message(
ps->primary.osd,
new MBackfillReserve(
MBackfillReserve::TOOFULL,
MBackfillReserve::REVOKE_TOOFULL,
spg_t(ps->info.pgid.pgid, ps->primary.shard),
ps->get_osdmap_epoch()),
ps->get_osdmap_epoch());
Expand Down
34 changes: 17 additions & 17 deletions src/osd/PeeringState.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ class PeeringState : public MissingLoc::MappingInfo {
TrivialEvent(NeedUpThru)
TrivialEvent(Backfilled)
TrivialEvent(LocalBackfillReserved)
TrivialEvent(RejectRemoteReservation)
TrivialEvent(RejectTooFullRemoteReservation)
TrivialEvent(RequestBackfill)
TrivialEvent(RemoteRecoveryPreempted)
TrivialEvent(RemoteBackfillPreempted)
Expand Down Expand Up @@ -870,12 +870,12 @@ class PeeringState : public MissingLoc::MappingInfo {
boost::statechart::custom_reaction< Backfilled >,
boost::statechart::custom_reaction< DeferBackfill >,
boost::statechart::custom_reaction< UnfoundBackfill >,
boost::statechart::custom_reaction< RemoteReservationRejected >,
boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>,
boost::statechart::custom_reaction< RemoteReservationRevoked>
> reactions;
explicit Backfilling(my_context ctx);
boost::statechart::result react(const RemoteReservationRejected& evt) {
boost::statechart::result react(const RemoteReservationRejectedTooFull& evt) {
// for compat with old peers
post_event(RemoteReservationRevokedTooFull());
return discard_event();
Expand All @@ -893,7 +893,7 @@ class PeeringState : public MissingLoc::MappingInfo {
struct WaitRemoteBackfillReserved : boost::statechart::state< WaitRemoteBackfillReserved, Active >, NamedState {
typedef boost::mpl::list<
boost::statechart::custom_reaction< RemoteBackfillReserved >,
boost::statechart::custom_reaction< RemoteReservationRejected >,
boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
boost::statechart::custom_reaction< RemoteReservationRevoked >,
boost::statechart::transition< AllBackfillsReserved, Backfilling >
> reactions;
Expand All @@ -902,7 +902,7 @@ class PeeringState : public MissingLoc::MappingInfo {
void retry();
void exit();
boost::statechart::result react(const RemoteBackfillReserved& evt);
boost::statechart::result react(const RemoteReservationRejected& evt);
boost::statechart::result react(const RemoteReservationRejectedTooFull& evt);
boost::statechart::result react(const RemoteReservationRevoked& evt);
};

Expand All @@ -918,12 +918,12 @@ class PeeringState : public MissingLoc::MappingInfo {
typedef boost::mpl::list<
boost::statechart::transition< RequestBackfill, WaitLocalBackfillReserved>,
boost::statechart::custom_reaction< RemoteBackfillReserved >,
boost::statechart::custom_reaction< RemoteReservationRejected >
boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >
> reactions;
explicit NotBackfilling(my_context ctx);
void exit();
boost::statechart::result react(const RemoteBackfillReserved& evt);
boost::statechart::result react(const RemoteReservationRejected& evt);
boost::statechart::result react(const RemoteReservationRejectedTooFull& evt);
};

struct NotRecovering : boost::statechart::state< NotRecovering, Active>, NamedState {
Expand Down Expand Up @@ -1003,7 +1003,7 @@ class PeeringState : public MissingLoc::MappingInfo {
typedef boost::mpl::list<
boost::statechart::transition< RecoveryDone, RepNotRecovering >,
// for compat with old peers
boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >,
boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
boost::statechart::custom_reaction< BackfillTooFull >,
boost::statechart::custom_reaction< RemoteRecoveryPreempted >,
Expand All @@ -1019,29 +1019,29 @@ class PeeringState : public MissingLoc::MappingInfo {
struct RepWaitBackfillReserved : boost::statechart::state< RepWaitBackfillReserved, ReplicaActive >, NamedState {
typedef boost::mpl::list<
boost::statechart::custom_reaction< RemoteBackfillReserved >,
boost::statechart::custom_reaction< RejectRemoteReservation >,
boost::statechart::custom_reaction< RemoteReservationRejected >,
boost::statechart::custom_reaction< RejectTooFullRemoteReservation >,
boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
boost::statechart::custom_reaction< RemoteReservationCanceled >
> reactions;
explicit RepWaitBackfillReserved(my_context ctx);
void exit();
boost::statechart::result react(const RemoteBackfillReserved &evt);
boost::statechart::result react(const RejectRemoteReservation &evt);
boost::statechart::result react(const RemoteReservationRejected &evt);
boost::statechart::result react(const RejectTooFullRemoteReservation &evt);
boost::statechart::result react(const RemoteReservationRejectedTooFull &evt);
boost::statechart::result react(const RemoteReservationCanceled &evt);
};

struct RepWaitRecoveryReserved : boost::statechart::state< RepWaitRecoveryReserved, ReplicaActive >, NamedState {
typedef boost::mpl::list<
boost::statechart::custom_reaction< RemoteRecoveryReserved >,
// for compat with old peers
boost::statechart::custom_reaction< RemoteReservationRejected >,
boost::statechart::custom_reaction< RemoteReservationRejectedTooFull >,
boost::statechart::custom_reaction< RemoteReservationCanceled >
> reactions;
explicit RepWaitRecoveryReserved(my_context ctx);
void exit();
boost::statechart::result react(const RemoteRecoveryReserved &evt);
boost::statechart::result react(const RemoteReservationRejected &evt) {
boost::statechart::result react(const RemoteReservationRejectedTooFull &evt) {
// for compat with old peers
post_event(RemoteReservationCanceled());
return discard_event();
Expand All @@ -1053,8 +1053,8 @@ class PeeringState : public MissingLoc::MappingInfo {
typedef boost::mpl::list<
boost::statechart::custom_reaction< RequestRecoveryPrio >,
boost::statechart::custom_reaction< RequestBackfillPrio >,
boost::statechart::custom_reaction< RejectRemoteReservation >,
boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >,
boost::statechart::custom_reaction< RejectTooFullRemoteReservation >,
boost::statechart::transition< RemoteReservationRejectedTooFull, RepNotRecovering >,
boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >,
boost::statechart::custom_reaction< RemoteRecoveryReserved >,
boost::statechart::custom_reaction< RemoteBackfillReserved >,
Expand All @@ -1071,7 +1071,7 @@ class PeeringState : public MissingLoc::MappingInfo {
// my reservation completion raced with a RELEASE from primary
return discard_event();
}
boost::statechart::result react(const RejectRemoteReservation &evt);
boost::statechart::result react(const RejectTooFullRemoteReservation &evt);
void exit();
};

Expand Down

0 comments on commit 94b8839

Please sign in to comment.