Skip to content

Commit

Permalink
WIP failing test investigation
Browse files Browse the repository at this point in the history
  • Loading branch information
mprimi committed Jan 3, 2024
1 parent 19e5512 commit 8dcb801
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 19 deletions.
17 changes: 14 additions & 3 deletions server/raft_helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ type raftChainStateMachine struct {
hash hash.Hash
blocksApplied uint64
blocksAppliedSinceSnapshot uint64
stopped bool
}

// Block is just a random array of bytes, but contains a little extra metadata to track its source
Expand All @@ -317,10 +318,14 @@ func (sm *raftChainStateMachine) logDebug(format string, args ...any) {
}

func (sm *raftChainStateMachine) server() *Server {
sm.Lock()
defer sm.Unlock()
return sm.s
}

func (sm *raftChainStateMachine) node() RaftNode {
sm.Lock()
defer sm.Unlock()
return sm.n
}

Expand Down Expand Up @@ -364,6 +369,9 @@ func (sm *raftChainStateMachine) leaderChange(isLeader bool) {
sm.logDebug("Leader change")
}
sm.leader = isLeader
if isLeader != sm.node().Leader() {
sm.logDebug("⚠️ Leader state out of sync with underlying node")
}
}

func (sm *raftChainStateMachine) stop() {
Expand All @@ -372,8 +380,10 @@ func (sm *raftChainStateMachine) stop() {
sm.n.Stop()

// Clear state, on restart it will be recovered from snapshot or peers
sm.stopped = true
sm.blocksApplied = 0
sm.hash.Reset()
sm.leader = false
sm.logDebug("Stopped")
}

Expand All @@ -383,6 +393,7 @@ func (sm *raftChainStateMachine) restart() {

sm.logDebug("Restarting")

sm.stopped = false
if sm.n.State() != Closed {
return
}
Expand Down Expand Up @@ -455,12 +466,12 @@ func (sm *raftChainStateMachine) applyBlock(data []byte) {
sm.logDebug("Hash after %d blocks: %X ", sm.blocksApplied, sm.hash.Sum(nil))
}

func (sm *raftChainStateMachine) getCurrentHash() (uint64, string) {
func (sm *raftChainStateMachine) getCurrentHash() (bool, uint64, string) {
sm.Lock()
defer sm.Unlock()

// Return the number of blocks applied and the current running hash
return sm.blocksApplied, fmt.Sprintf("%X", sm.hash.Sum(nil))
// Return running, the number of blocks applied and the current running hash
return !sm.stopped, sm.blocksApplied, fmt.Sprintf("%X", sm.hash.Sum(nil))
}

type chainHashSnapshot struct {
Expand Down
71 changes: 55 additions & 16 deletions server/raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,15 @@ func TestRaftChainOneBlockInLockstep(t *testing.T) {
stateMachine.node().ID(),
)
checkFor(t, timeout, 500*time.Millisecond, func() error {
blocksCount, currentHash := stateMachine.getCurrentHash()
running, blocksCount, currentHash := stateMachine.getCurrentHash()
// All nodes always running
if !running {
return fmt.Errorf(
"node %s is not running",
nodeName,
)
}
// Node is behind
if blocksCount != iteration {
return fmt.Errorf(
"node %s applied %d blocks out of %d expected",
Expand Down Expand Up @@ -413,7 +421,15 @@ func TestRaftChainStopAndCatchUp(t *testing.T) {
stateMachine.node().ID(),
)
checkFor(t, timeout, 500*time.Millisecond, func() error {
blocksCount, currentHash := stateMachine.getCurrentHash()
running, blocksCount, currentHash := stateMachine.getCurrentHash()
// All nodes should be running
if !running {
return fmt.Errorf(
"node %s not running",
nodeName,
)
}
// Node is behind
if blocksCount != expectedBlocks {
return fmt.Errorf(
"node %s applied %d blocks out of %d expected",
Expand Down Expand Up @@ -466,10 +482,10 @@ func FuzzRaftChain(f *testing.F) {
checkConvergenceTimeout = 30 * time.Second
)

//RaftChainOptions.verbose = true
RaftChainOptions.verbose = true

// Cases to run when executed as unit test:
f.Add(100, int64(123456))
//f.Add(100, int64(123456))
f.Add(1000, int64(123456))

// Run in Fuzz mode to repeat maximizing coverage and looking for failing cases
Expand Down Expand Up @@ -560,16 +576,27 @@ func FuzzRaftChain(f *testing.F) {
b := strings.Builder{}
for _, sm := range rg {
csm := sm.(*raftChainStateMachine)
blocksCount, blockHash := csm.getCurrentHash()
b.WriteString(
fmt.Sprintf(
" [%s (%s): %d blocks, hash=%s],",
csm.server().Name(),
csm.node().ID(),
blocksCount,
blockHash,
),
)
running, blocksCount, blockHash := csm.getCurrentHash()
if running {
b.WriteString(
fmt.Sprintf(
" [%s (%s): %d blocks, hash=%s],",
csm.server().Name(),
csm.node().ID(),
blocksCount,
blockHash,
),
)
} else {
b.WriteString(
fmt.Sprintf(
" [%s (%s): STOPPED],",
csm.server().Name(),
csm.node().ID(),
),
)

}
}
return b.String()
}
Expand Down Expand Up @@ -657,10 +684,22 @@ func FuzzRaftChain(f *testing.F) {
checkConvergenceTimeout,
1*time.Second,
func() error {
referenceBlocksCount, referenceHash := rg[0].(*raftChainStateMachine).getCurrentHash()
referenceRunning, referenceBlocksCount, referenceHash := rg[0].(*raftChainStateMachine).getCurrentHash()
if !referenceRunning {
return fmt.Errorf(
"reference node not running",
)
}
for _, n := range rg {
sm := n.(*raftChainStateMachine)
blocksCount, blockHash := sm.getCurrentHash()
running, blocksCount, blockHash := sm.getCurrentHash()
if !running {
return fmt.Errorf(
"node not running: %s (%s)",
sm.server().Name(),
sm.node().ID(),
)
}
// Track the highest block seen
if blocksCount > highestBlocksCount {
t.Logf(
Expand Down

0 comments on commit 8dcb801

Please sign in to comment.