Skip to content

Commit

Permalink
SOLR-3995: Recovery may never finish on SolrCore shutdown if the last…
Browse files Browse the repository at this point in the history
… reference to a SolrCore is closed by the recovery process

SOLR-3994:Create more extensive tests around unloading cores.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/branch_4x@1402396 13f79535-47bb-0310-9956-ffa450edef68
(cherry picked from commit 65621e2)

Conflicts:

	solr/CHANGES.txt
  • Loading branch information
markrmiller authored and hossman committed Oct 30, 2012
1 parent a4b2f34 commit 3f2468f
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 29 deletions.
10 changes: 8 additions & 2 deletions solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
Expand Up @@ -17,6 +17,7 @@
import org.apache.solr.update.UpdateLog;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -38,7 +39,7 @@
*/

public abstract class ElectionContext {

private static Logger log = LoggerFactory.getLogger(ElectionContext.class);
final String electionPath;
final ZkNodeProps leaderProps;
final String id;
Expand All @@ -58,7 +59,12 @@ public ElectionContext(final String shardZkNodeName,
public void close() {}

public void cancelElection() throws InterruptedException, KeeperException {
zkClient.delete(leaderSeqPath, -1, true);
try {
zkClient.delete(leaderSeqPath, -1, true);
} catch (NoNodeException e) {
// fine
log.warn("cancelElection did not find election node to remove");
}
}

abstract void runLeaderProcess(boolean weAreReplacement) throws KeeperException, InterruptedException, IOException;
Expand Down
4 changes: 4 additions & 0 deletions solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
Expand Up @@ -84,6 +84,10 @@ private void checkIfIamLeader(final int seq, final ElectionContext context, bool

sortSeqs(seqs);
List<Integer> intSeqs = getSeqs(seqs);
if (intSeqs.size() == 0) {
log.warn("Our node is no longer in line to be leader");
return;
}
if (seq <= intSeqs.get(0)) {
// first we delete the node advertising the old leader in case the ephem is still there
try {
Expand Down
Expand Up @@ -311,7 +311,7 @@ public void doRecovery(SolrCore core) throws KeeperException, InterruptedExcepti
}
}

while (!successfulRecovery && !isInterrupted()) { // don't use interruption or it will close channels though
while (!successfulRecovery && !isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
try {
CloudDescriptor cloudDesc = core.getCoreDescriptor()
.getCloudDescriptor();
Expand Down
4 changes: 3 additions & 1 deletion solr/core/src/java/org/apache/solr/core/CoreContainer.java
Expand Up @@ -1083,7 +1083,9 @@ public SolrCore remove( String name ) {

synchronized(cores) {
SolrCore core = cores.remove( name );
coreToOrigName.remove(core);
if (core != null) {
coreToOrigName.remove(core);
}
return core;
}

Expand Down
Expand Up @@ -668,7 +668,12 @@ public void postClose(SolrCore core) {
});
}
} finally {
if (core != null) core.close();
if (core != null) {
if (coreContainer.getZkController() != null) {
core.getSolrCoreState().cancelRecovery();
}
core.close();
}
}
return coreContainer.isPersistent();

Expand Down
Expand Up @@ -45,7 +45,7 @@ public final class DefaultSolrCoreState extends SolrCoreState implements Recover

private volatile boolean recoveryRunning;
private RecoveryStrategy recoveryStrat;
private boolean closed = false;
private volatile boolean closed = false;

private RefCounted<IndexWriter> refCntWriter;

Expand Down
Expand Up @@ -67,7 +67,6 @@
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionParams.CollectionAction;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.NamedList;
Expand Down Expand Up @@ -322,19 +321,19 @@ public void doTest() throws Exception {

// would be better if these where all separate tests - but much, much
// slower
// doOptimisticLockingAndUpdating();
// testMultipleCollections();
// testANewCollectionInOneInstance();
// testSearchByCollectionName();
// testANewCollectionInOneInstanceWithManualShardAssignement();
// testNumberOfCommitsWithCommitAfterAdd();
//
// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
// testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");
//
// testCollectionsAPI();
doOptimisticLockingAndUpdating();
testMultipleCollections();
testANewCollectionInOneInstance();
testSearchByCollectionName();
testANewCollectionInOneInstanceWithManualShardAssignement();
testNumberOfCommitsWithCommitAfterAdd();

testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-explicit");
testUpdateProcessorsRunOnlyOnce("distrib-dup-test-chain-implicit");

testCollectionsAPI();
testCoreUnloadAndLeaders();

testUnloadLotsOfCores();
// Thread.sleep(10000000000L);
if (DEBUG) {
super.printLayout();
Expand Down Expand Up @@ -412,8 +411,6 @@ private void testCoreUnloadAndLeaders() throws Exception {
createCmd.setDataDir(core3dataDir);
server.request(createCmd);

Thread.sleep(1000);

waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);

// so that we start with some versions when we reload...
Expand Down Expand Up @@ -474,8 +471,6 @@ private void testCoreUnloadAndLeaders() throws Exception {
createCmd.setDataDir(core4dataDir);
server.request(createCmd);

Thread.sleep(1000);

waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);

// unload the leader again
Expand Down Expand Up @@ -509,9 +504,7 @@ private void testCoreUnloadAndLeaders() throws Exception {
createCmd.setCollection("unloadcollection");
createCmd.setDataDir(core1DataDir);
server.request(createCmd);

Thread.sleep(1000);


waitForRecoveriesToFinish("unloadcollection", zkStateReader, false);


Expand All @@ -538,7 +531,65 @@ private void testCoreUnloadAndLeaders() throws Exception {
assertEquals(found3, found4);

}


private void testUnloadLotsOfCores() throws Exception {
SolrServer client = clients.get(2);
String url3 = getBaseUrl(client);
final HttpSolrServer server = new HttpSolrServer(url3);

ThreadPoolExecutor executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE,
5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
new DefaultSolrThreadFactory("testExecutor"));
int cnt = atLeast(6);
for (int i = 0; i < cnt; i++) {
final int freezeI = i;
executor.execute(new Runnable() {

@Override
public void run() {
Create createCmd = new Create();
createCmd.setCoreName("multiunload" + freezeI);
createCmd.setCollection("multiunload");
String core3dataDir = dataDir.getAbsolutePath() + File.separator
+ System.currentTimeMillis() + "unloadcollection" + "_3n" + freezeI;
createCmd.setDataDir(core3dataDir);
try {
server.request(createCmd);
} catch (SolrServerException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

});
}
executor.shutdown();
executor.awaitTermination(120, TimeUnit.SECONDS);
executor = new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5,
TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
new DefaultSolrThreadFactory("testExecutor"));
for (int j = 0; j < cnt; j++) {
final int freezeJ = j;
executor.execute(new Runnable() {
@Override
public void run() {
Unload unloadCmd = new Unload(true);
unloadCmd.setCoreName("multiunload" + freezeJ);
try {
server.request(unloadCmd);
} catch (SolrServerException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
Thread.sleep(random().nextInt(50));
}
executor.shutdown();
executor.awaitTermination(120, TimeUnit.SECONDS);
}

private String getBaseUrl(SolrServer client) {
String url2 = ((HttpSolrServer) client).getBaseURL()
Expand Down Expand Up @@ -794,7 +845,7 @@ private void waitForNon403or404or503(HttpSolrServer collectionClient)
}
Thread.sleep(50);
}
printLayout();

fail("Could not find the new collection - " + exp.code() + " : " + collectionClient.getBaseURL());
}

Expand Down
Expand Up @@ -212,7 +212,6 @@ public NamedList<Object> request(SolrRequest request) throws SolrServerException
if ((sendToLeaders && leaderUrlList == null) || (!sendToLeaders
&& urlList == null)
|| clusterState.hashCode() != this.lastClusterStateHashCode) {
System.out.println("build a new map for " + collection);
// build a map of unique nodes
// TODO: allow filtering by group, role, etc
Map<String,ZkNodeProps> nodes = new HashMap<String,ZkNodeProps>();
Expand Down
Expand Up @@ -117,6 +117,7 @@ public ZkNodeProps getLeader(String collection, String shard) {
*/
public Replica getShardProps(final String collection, final String coreNodeName) {
Map<String, Slice> slices = getSlices(collection);
if (slices == null) return null;
for(Slice slice: slices.values()) {
if(slice.getReplicasMap().get(coreNodeName)!=null) {
return slice.getReplicasMap().get(coreNodeName);
Expand Down

0 comments on commit 3f2468f

Please sign in to comment.