Skip to content

Commit

Permalink
tortoise: remove several bottlenecks during recovery (#5010)
Browse files Browse the repository at this point in the history
related: #3006

- tally votes once per window, or after all ballots were loaded
- load ballots without joining identities table. that information already available in tortoise
- set tortoise log level to warn on mainnet. it is not pleasant that info is printed every time node is restarted
  • Loading branch information
dshulyak committed Sep 18, 2023
1 parent 0c8ef5f commit d3de17e
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 15 deletions.
7 changes: 5 additions & 2 deletions config/mainnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"runtime"
"time"

"go.uber.org/zap/zapcore"

"github.com/spacemeshos/go-spacemesh/activation"
"github.com/spacemeshos/go-spacemesh/api/grpcserver"
"github.com/spacemeshos/go-spacemesh/beacon"
Expand Down Expand Up @@ -36,7 +38,8 @@ func MainnetConfig() Config {
if smeshing.ProvingOpts.Threads < 1 {
smeshing.ProvingOpts.Threads = 1
}

logging := DefaultLoggingConfig()
logging.TrtlLoggerLevel = zapcore.WarnLevel.String()
return Config{
BaseConfig: BaseConfig{
DataDirParent: defaultDataDir,
Expand Down Expand Up @@ -131,7 +134,7 @@ func MainnetConfig() Config {
TIME: timeConfig.DefaultConfig(),
SMESHING: smeshing,
FETCH: fetch.DefaultConfig(),
LOGGING: DefaultLoggingConfig(),
LOGGING: logging,
Sync: syncer.Config{
Interval: time.Minute,
EpochEndFraction: 0.8,
Expand Down
2 changes: 2 additions & 0 deletions node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,13 +619,15 @@ func (app *App) initServices(ctx context.Context) error {
app.log.With().Info("tortoise will trace execution")
trtlopts = append(trtlopts, tortoise.WithTracer())
}
start := time.Now()
trtl, err := tortoise.Recover(
app.cachedDB,
app.clock.CurrentLayer(), beaconProtocol, trtlopts...,
)
if err != nil {
return fmt.Errorf("can't recover tortoise state: %w", err)
}
app.log.With().Info("tortoise initialized", log.Duration("duration", time.Since(start)))
app.eg.Go(func() error {
for rst := range beaconProtocol.Results() {
events.EmitBeacon(rst.Epoch, rst.Beacon)
Expand Down
25 changes: 25 additions & 0 deletions sql/ballots/ballots.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,31 @@ func Layer(db sql.Executor, lid types.LayerID) (rst []*types.Ballot, err error)
return rst, err
}

// LayerNoMalicious returns full ballot without joining malicious identities.
func LayerNoMalicious(db sql.Executor, lid types.LayerID) (rst []*types.Ballot, err error) {
var derr error
if _, err = db.Exec(`select id, ballot from ballots where layer = ?1;`,
func(stmt *sql.Statement) {
stmt.BindInt64(1, int64(lid))
}, func(stmt *sql.Statement) bool {
id := types.BallotID{}
stmt.ColumnBytes(0, id[:])
var ballot types.Ballot
_, derr := codec.DecodeFrom(stmt.ColumnReader(1), &ballot)
if derr != nil {
return false
}
ballot.SetID(id)
rst = append(rst, &ballot)
return true
}); err != nil {
return nil, fmt.Errorf("selecting %d: %w", lid, err)
} else if derr != nil {
return nil, fmt.Errorf("decoding %d: %w", lid, err)
}
return rst, err
}

// IDsInLayer returns ballots ids in the layer.
func IDsInLayer(db sql.Executor, lid types.LayerID) (rst []types.BallotID, err error) {
if _, err := db.Exec("select id from ballots where layer = ?1;", func(stmt *sql.Statement) {
Expand Down
7 changes: 7 additions & 0 deletions sql/ballots/ballots_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ func TestLayer(t *testing.T) {
for _, ballot := range rst {
require.True(t, ballot.IsMalicious())
}

rst, err = LayerNoMalicious(db, start)
require.NoError(t, err)
require.Len(t, rst, len(ballots))
for _, ballot := range rst {
require.False(t, ballot.IsMalicious())
}
}

func TestAdd(t *testing.T) {
Expand Down
3 changes: 3 additions & 0 deletions tortoise/algorithm.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ func New(opts ...Opt) (*Tortoise, error) {
zap.Uint32("zdist", t.cfg.Zdist),
)
}
if t.cfg.WindowSize == 0 {
t.logger.Panic("tortoise-window-size should not be zero")
}
t.trtl = newTurtle(t.logger, t.cfg)
if t.tracer != nil {
t.tracer.On(&ConfigTrace{
Expand Down
3 changes: 2 additions & 1 deletion tortoise/full.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ func (f *full) countBallot(logger *zap.Logger, ballot *ballotInfo) {
continue
}
layer := f.layer(lvote.lid)
height := ballot.reference.height
empty := true
for _, block := range layer.blocks {
if block.height > ballot.reference.height {
if block.height > height {
continue
}
vote := lvote.getVote(block)
Expand Down
2 changes: 1 addition & 1 deletion tortoise/model/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (c *core) OnMessage(m Messenger, event Message) {
m.Send(MessageBallot{Ballot: ballot})
case MessageLayerEnd:
if ev.LayerID.After(types.GetEffectiveGenesis()) {
tortoise.RecoverLayer(context.Background(), c.tortoise, c.cdb, c.beacons, ev.LayerID, ev.LayerID)
tortoise.RecoverLayer(context.Background(), c.tortoise, c.cdb, c.beacons, ev.LayerID, ev.LayerID, ev.LayerID)
m.Notify(EventVerified{ID: c.id, Verified: c.tortoise.LatestComplete(), Layer: ev.LayerID})
}

Expand Down
17 changes: 9 additions & 8 deletions tortoise/recover.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func Recover(db *datastore.CachedDB, latest types.LayerID, beacon system.BeaconG
return nil, err
}

layer, err := ballots.LatestLayer(db)
last, err := ballots.LatestLayer(db)
if err != nil {
return nil, fmt.Errorf("failed to load latest known layer: %w", err)
}
Expand All @@ -49,15 +49,15 @@ func Recover(db *datastore.CachedDB, latest types.LayerID, beacon system.BeaconG
return nil, fmt.Errorf("failed to load latest epoch: %w", err)
}
epoch++ // recoverEpoch expects target epoch, rather than publish
if layer.GetEpoch() != epoch {
for eid := layer.GetEpoch(); eid <= epoch; eid++ {
if last.GetEpoch() != epoch {
for eid := last.GetEpoch(); eid <= epoch; eid++ {
if err := recoverEpoch(eid, trtl, db, beacon); err != nil {
return nil, err
}
}
}
for lid := types.GetEffectiveGenesis().Add(1); !lid.After(layer); lid = lid.Add(1) {
if err := RecoverLayer(context.Background(), trtl, db, beacon, lid, min(layer, latest)); err != nil {
for lid := types.GetEffectiveGenesis().Add(1); !lid.After(last); lid = lid.Add(1) {
if err := RecoverLayer(context.Background(), trtl, db, beacon, lid, last, min(last, latest)); err != nil {
return nil, fmt.Errorf("failed to load tortoise state at layer %d: %w", lid, err)
}
}
Expand All @@ -78,7 +78,7 @@ func recoverEpoch(epoch types.EpochID, trtl *Tortoise, db *datastore.CachedDB, b
return nil
}

func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, beacon system.BeaconGetter, lid, current types.LayerID) error {
func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, beacon system.BeaconGetter, lid, last, current types.LayerID) error {
if lid.FirstInEpoch() {
if err := recoverEpoch(lid.GetEpoch(), trtl, db, beacon); err != nil {
return err
Expand Down Expand Up @@ -106,7 +106,8 @@ func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, b
trtl.OnHareOutput(lid, hare)
}
}
ballotsrst, err := ballots.Layer(db, lid)
// NOTE(dshulyak) we loaded information about malicious identities earlier.
ballotsrst, err := ballots.LayerNoMalicious(db, lid)
if err != nil {
return err
}
Expand All @@ -127,7 +128,7 @@ func RecoverLayer(ctx context.Context, trtl *Tortoise, db *datastore.CachedDB, b
if err == nil {
trtl.OnWeakCoin(lid, coin)
}
if lid <= current {
if lid <= current && (lid%types.LayerID(trtl.cfg.WindowSize) == 0 || lid == last) {
trtl.TallyVotes(ctx, lid)

opinion, err := layers.GetAggregatedHash(db, lid-1)
Expand Down
2 changes: 1 addition & 1 deletion tortoise/recover_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (a *recoveryAdapter) TallyVotes(ctx context.Context, current types.LayerID)
a.prev = genesis
}
for lid := a.prev; lid <= current; lid++ {
require.NoError(a, RecoverLayer(ctx, a.Tortoise, a.db, a.beacon, lid, current))
require.NoError(a, RecoverLayer(ctx, a.Tortoise, a.db, a.beacon, lid, current, current))
a.prev = lid
}
}
Expand Down
4 changes: 2 additions & 2 deletions tortoise/tortoise_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/spacemeshos/go-spacemesh/sql/ballots"
"github.com/spacemeshos/go-spacemesh/sql/blocks"
"github.com/spacemeshos/go-spacemesh/sql/certificates"
"github.com/spacemeshos/go-spacemesh/sql/identities"
"github.com/spacemeshos/go-spacemesh/sql/layers"
"github.com/spacemeshos/go-spacemesh/tortoise/opinionhash"
"github.com/spacemeshos/go-spacemesh/tortoise/sim"
Expand Down Expand Up @@ -1094,6 +1093,7 @@ func TestBaseBallotPrioritization(t *testing.T) {
sim.WithSequence(5),
},
expected: genesis.Add(5),
window: 1,
},
{
desc: "BadBlocksIgnored",
Expand Down Expand Up @@ -1885,7 +1885,7 @@ func TestMaliciousBallotsAreIgnored(t *testing.T) {
blts, err := ballots.Layer(s.GetState(0).DB, last)
require.NoError(t, err)
for _, ballot := range blts {
require.NoError(t, identities.SetMalicious(s.GetState(0).DB, ballot.SmesherID, []byte("proof"), time.Now()))
tortoise.OnMalfeasance(ballot.SmesherID)
}

tortoise.TallyVotes(ctx, s.Next())
Expand Down

0 comments on commit d3de17e

Please sign in to comment.