-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
RaftReplicator.java
183 lines (170 loc) · 7.49 KB
/
RaftReplicator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
/*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j Enterprise Edition. The included source
* code can be redistributed and/or modified under the terms of the
* GNU AFFERO GENERAL PUBLIC LICENSE Version 3
* (http://www.fsf.org/licensing/licenses/agpl-3.0.html) with the
* Commons Clause, as found in the associated LICENSE.txt file.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* Neo4j object code can be licensed independently from the source
* under separate terms from the AGPL. Inquiries can be directed to:
* licensing@neo4j.com
*
* More information is also available at:
* https://neo4j.com/licensing/
*/
package org.neo4j.causalclustering.core.replication;
import java.util.concurrent.Future;
import java.util.function.BiConsumer;
import org.neo4j.causalclustering.core.consensus.LeaderInfo;
import org.neo4j.causalclustering.core.consensus.LeaderListener;
import org.neo4j.causalclustering.core.consensus.LeaderLocator;
import org.neo4j.causalclustering.core.consensus.NoLeaderFoundException;
import org.neo4j.causalclustering.core.consensus.RaftMessages;
import org.neo4j.causalclustering.core.replication.monitoring.ReplicationMonitor;
import org.neo4j.causalclustering.core.replication.session.LocalSessionPool;
import org.neo4j.causalclustering.core.replication.session.OperationContext;
import org.neo4j.causalclustering.helper.TimeoutStrategy;
import org.neo4j.causalclustering.identity.MemberId;
import org.neo4j.causalclustering.messaging.Outbound;
import org.neo4j.kernel.availability.AvailabilityGuard;
import org.neo4j.kernel.availability.UnavailableException;
import org.neo4j.kernel.monitoring.Monitors;
import org.neo4j.logging.Log;
import org.neo4j.logging.LogProvider;
/**
* A replicator implementation suitable in a RAFT context. Will handle resending due to timeouts and leader switches.
*/
public class RaftReplicator implements Replicator, LeaderListener
{
private final MemberId me;
private final Outbound<MemberId,RaftMessages.RaftMessage> outbound;
private final ProgressTracker progressTracker;
private final LocalSessionPool sessionPool;
private final TimeoutStrategy progressTimeoutStrategy;
private final AvailabilityGuard availabilityGuard;
private final LeaderLocator leaderLocator;
private final TimeoutStrategy leaderTimeoutStrategy;
private final Log log;
private final ReplicationMonitor replicationMonitor;
private final long availabilityTimeoutMillis;
public RaftReplicator( LeaderLocator leaderLocator, MemberId me, Outbound<MemberId,RaftMessages.RaftMessage> outbound, LocalSessionPool sessionPool,
ProgressTracker progressTracker, TimeoutStrategy progressTimeoutStrategy, TimeoutStrategy leaderTimeoutStrategy, long availabilityTimeoutMillis,
AvailabilityGuard availabilityGuard, LogProvider logProvider, Monitors monitors )
{
this.me = me;
this.outbound = outbound;
this.progressTracker = progressTracker;
this.sessionPool = sessionPool;
this.progressTimeoutStrategy = progressTimeoutStrategy;
this.leaderTimeoutStrategy = leaderTimeoutStrategy;
this.availabilityTimeoutMillis = availabilityTimeoutMillis;
this.availabilityGuard = availabilityGuard;
this.leaderLocator = leaderLocator;
leaderLocator.registerListener( this );
log = logProvider.getLog( getClass() );
this.replicationMonitor = monitors.newMonitor( ReplicationMonitor.class );
}
@Override
public Future<Object> replicate( ReplicatedContent command, boolean trackResult ) throws ReplicationFailureException
{
MemberId originalLeader;
try
{
originalLeader = leaderLocator.getLeader();
}
catch ( NoLeaderFoundException e )
{
throw new ReplicationFailureException( "Replication aborted since no leader was available", e );
}
return replicate0( command, trackResult, originalLeader );
}
private Future<Object> replicate0( ReplicatedContent command, boolean trackResult, MemberId leader ) throws ReplicationFailureException
{
replicationMonitor.startReplication();
try
{
OperationContext session = sessionPool.acquireSession();
DistributedOperation operation = new DistributedOperation( command, session.globalSession(), session.localOperationId() );
Progress progress = progressTracker.start( operation );
TimeoutStrategy.Timeout progressTimeout = progressTimeoutStrategy.newTimeout();
TimeoutStrategy.Timeout leaderTimeout = leaderTimeoutStrategy.newTimeout();
int attempts = 0;
try
{
while ( true )
{
attempts++;
if ( attempts > 1 )
{
log.info( "Retrying replication. Current attempt: %d Content: %s", attempts, command );
}
replicationMonitor.replicationAttempt();
assertDatabaseAvailable();
try
{
// blocking at least until the send has succeeded or failed before retrying
outbound.send( leader, new RaftMessages.NewEntry.Request( me, operation ), true );
progress.awaitReplication( progressTimeout.getMillis() );
if ( progress.isReplicated() )
{
break;
}
progressTimeout.increment();
leader = leaderLocator.getLeader();
}
catch ( NoLeaderFoundException e )
{
log.debug( "Could not replicate operation " + operation + " because no leader was found. Retrying.", e );
Thread.sleep( leaderTimeout.getMillis() );
leaderTimeout.increment();
}
}
}
catch ( InterruptedException e )
{
progressTracker.abort( operation );
throw new ReplicationFailureException( "Interrupted while replicating", e );
}
BiConsumer<Object,Throwable> cleanup = ( ignored1, ignored2 ) -> sessionPool.releaseSession( session );
if ( trackResult )
{
progress.futureResult().whenComplete( cleanup );
}
else
{
cleanup.accept( null, null );
}
replicationMonitor.successfulReplication();
return progress.futureResult();
}
catch ( Throwable t )
{
replicationMonitor.failedReplication( t );
throw t;
}
}
@Override
public void onLeaderSwitch( LeaderInfo leaderInfo )
{
progressTracker.triggerReplicationEvent();
}
private void assertDatabaseAvailable() throws ReplicationFailureException
{
try
{
availabilityGuard.await( availabilityTimeoutMillis );
}
catch ( UnavailableException e )
{
throw new ReplicationFailureException( "Database is not available, transaction cannot be replicated.", e );
}
}
}