Skip to content

Commit

Permalink
add support for Lucene indexes configuration: directory (mmap, fs), a…
Browse files Browse the repository at this point in the history
…nalyzers, stopwords, size of buffers
  • Loading branch information
robfrank committed Mar 7, 2016
1 parent dbec85b commit 131a1a4
Show file tree
Hide file tree
Showing 21 changed files with 797 additions and 705 deletions.
Expand Up @@ -15,6 +15,9 @@
*/
package com.orientechnologies.orient.server.distributed;

import java.io.File;
import java.io.IOException;

import com.orientechnologies.common.io.OFileUtils;
import com.orientechnologies.common.util.OCallable;
import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx;
Expand All @@ -23,9 +26,6 @@
import com.tinkerpop.blueprints.impls.orient.OrientBaseGraph;
import com.tinkerpop.blueprints.impls.orient.OrientGraphFactory;

import java.io.File;
import java.io.IOException;

/**
* Running server instance.
*
Expand Down
Expand Up @@ -17,18 +17,12 @@
package com.orientechnologies.lucene;

import com.orientechnologies.common.log.OLogManager;
import com.orientechnologies.lucene.builder.ODocBuilder;
import com.orientechnologies.lucene.builder.OQueryBuilderImpl;
import com.orientechnologies.lucene.engine.OLuceneFullTextExpIndexEngine;
import com.orientechnologies.lucene.engine.OLuceneIndexEngineDelegate;
import com.orientechnologies.lucene.engine.OLuceneStorage;
import com.orientechnologies.lucene.index.OLuceneFullTextExpIndex;
import com.orientechnologies.lucene.index.OLuceneFullTextIndex;
import com.orientechnologies.orient.core.Orient;
import com.orientechnologies.orient.core.db.ODatabaseDocumentInternal;
import com.orientechnologies.orient.core.db.ODatabaseInternal;
import com.orientechnologies.orient.core.db.ODatabaseLifecycleListener;
import com.orientechnologies.orient.core.db.ODatabaseRecordThreadLocal;
import com.orientechnologies.orient.core.exception.OConfigurationException;
import com.orientechnologies.orient.core.index.OIndex;
import com.orientechnologies.orient.core.index.OIndexEngine;
Expand All @@ -41,47 +35,37 @@
import org.apache.lucene.analysis.standard.StandardAnalyzer;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

public class OLuceneIndexFactory implements OIndexFactory, ODatabaseLifecycleListener {

public static final String LUCENE_ALGORITHM = "LUCENE";
public static final String LUCENEEXP_ALGORITHM = "LUCENEEXP";
public static final String LUCENE_ALGORITHM = "LUCENE";

private static final Set<String> TYPES;
private static final Set<String> ALGORITHMS;

static {
final Set<String> types = new HashSet<String>();
types.add(OClass.INDEX_TYPE.FULLTEXT.toString());
types.add(OClass.INDEX_TYPE.FULLTEXTEXP.toString());
TYPES = Collections.unmodifiableSet(types);
}

static {
final Set<String> algorithms = new HashSet<String>();
algorithms.add(LUCENE_ALGORITHM);
algorithms.add(LUCENEEXP_ALGORITHM);
ALGORITHMS = Collections.unmodifiableSet(algorithms);
}

private final Map<String, OIndexInternal> db2luceneindexes;
private final Map<ODatabaseDocumentInternal, OLuceneStorage> db2luceneEngine;

public OLuceneIndexFactory() {
this(false);
}

public OLuceneIndexFactory(boolean manual) {
if (!manual)
Orient.instance()
.addDbLifecycleListener(this);
Orient.instance().addDbLifecycleListener(this);

db2luceneindexes = new HashMap<String, OIndexInternal>();
db2luceneEngine = new HashMap<ODatabaseDocumentInternal, OLuceneStorage>();
}

@Override
Expand All @@ -101,52 +85,24 @@ public Set<String> getAlgorithms() {

@Override
public OIndexInternal<?> createIndex(String name, ODatabaseDocumentInternal database, String indexType, String algorithm,
String valueContainerAlgorithm, ODocument metadata, int version)
throws OConfigurationException {
String valueContainerAlgorithm, ODocument metadata, int version) throws OConfigurationException {

OAbstractPaginatedStorage storage = (OAbstractPaginatedStorage) database.getStorage()
.getUnderlying();
OAbstractPaginatedStorage storage = (OAbstractPaginatedStorage) database.getStorage().getUnderlying();

if (metadata == null)
metadata = new ODocument().field("analyzer", StandardAnalyzer.class.getName());

if (OClass.INDEX_TYPE.FULLTEXT.toString().equals(indexType)) {
return new OLuceneFullTextIndex(name, indexType, LUCENE_ALGORITHM, version, storage, valueContainerAlgorithm, metadata);
} else if (OClass.INDEX_TYPE.FULLTEXTEXP.toString()
.equals(indexType)) {

OLogManager.instance()
.info(this, "create index - database:: %s , indexName:: %s , algo:: %s , valuecontalgo:: %s", database.getName(),
name,
algorithm, valueContainerAlgorithm);
if (!db2luceneindexes.containsKey(database.getName()))
db2luceneindexes.put(name, new OLuceneFullTextExpIndex(name, indexType, LUCENEEXP_ALGORITHM, version, storage,
valueContainerAlgorithm, metadata));

return new OLuceneFullTextExpIndex(name, indexType, LUCENEEXP_ALGORITHM, version, storage, valueContainerAlgorithm, metadata);

}

throw new OConfigurationException("Unsupported type : " + algorithm);
}

@Override
public OIndexEngine createIndexEngine(String algorithm, String name, Boolean durableInNonTxMode, OStorage storage, int version,
Map<String, String> engineProperties) {

if (LUCENEEXP_ALGORITHM.equalsIgnoreCase(algorithm)) {
final ODatabaseDocumentInternal database = ODatabaseRecordThreadLocal.INSTANCE.getIfDefined();

OLogManager.instance()
.info(this, "CREATE ENGINE database:: %s , name:: %s , algoritmh:: %s", database.getName(), name, algorithm);
if (!db2luceneEngine.containsKey(database)) {
OLogManager.instance()
.info(this, "REGISTERING name:: %s , algoritmh:: %s , engProps:: %s", name, algorithm, engineProperties);
Map<String, String> engineProperties) {

db2luceneEngine.put(database, new OLuceneStorage(name, new ODocBuilder(), new OQueryBuilderImpl()));

}
return new OLuceneFullTextExpIndexEngine(name, db2luceneEngine.get(database), new ODocBuilder(), new OQueryBuilderImpl());
}
return new OLuceneIndexEngineDelegate(name, durableInNonTxMode, storage, version);

}
Expand Down Expand Up @@ -175,28 +131,19 @@ public void onClose(ODatabaseInternal iDatabase) {

@Override
public void onDrop(final ODatabaseInternal iDatabase) {
OLogManager.instance().info(this, "---->>> onDrop");
try {
OLogManager.instance().info(this, "Dropping Lucene indexes...");
OLogManager.instance().debug(this, "Dropping Lucene indexes...");
for (OIndex idx : iDatabase.getMetadata().getIndexManager().getIndexes()) {

if (idx.getInternal() instanceof OLuceneFullTextExpIndex
|| idx.getInternal() instanceof OLuceneFullTextIndex) {
if (idx.getInternal() instanceof OLuceneFullTextIndex) {

OLogManager.instance().debug(this, "- index '%s'", idx.getName());
idx.delete();
}
if (idx.getInternal() instanceof OLuceneFullTextExpIndex) {

OLuceneStorage luceneStorage = this.db2luceneEngine.get(iDatabase);

luceneStorage.delete(iDatabase);
}
}

} catch (Exception e) {
OLogManager.instance()
.warn(this, "Error on dropping Lucene indexes", e);
OLogManager.instance().warn(this, "Error on dropping Lucene indexes", e);
}
}

Expand Down
Expand Up @@ -7,25 +7,22 @@
import com.orientechnologies.orient.core.record.impl.ODocument;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;

import java.lang.reflect.Constructor;
import java.util.Collection;

/**
* Created by frank on 30/10/2015.
*/
public class OLuceneAnalyzerFactory {

public enum AnalyzerKind {
INDEX, QUERY;

@Override
public String toString() {
return name().toLowerCase();
}
}

public Analyzer createAnalyzer(OIndexDefinition index, AnalyzerKind kind, ODocument metadata) {
String defaultAnalyzerFQN = metadata.field("analyzer");
String defaultAnalyzerFQN = metadata.field("default");

String prefix = "";
if (metadata.containsField("prefix_with_class_name") && metadata.<Boolean>field("prefix_with_class_name"))
prefix = index.getClassName() + ".";

//preset default analyzer for all fields
OLucenePerFieldAnalyzerWrapper analyzer;
Expand All @@ -35,20 +32,26 @@ public Analyzer createAnalyzer(OIndexDefinition index, AnalyzerKind kind, ODocum
analyzer = new OLucenePerFieldAnalyzerWrapper(buildAnalyzer(defaultAnalyzerFQN));
}

//default analyzer for indexing
String indexAnalyzerFQN = metadata.field(kind + "_analyzer");
if (indexAnalyzerFQN != null) {
//default analyzer for requested kind
String specializedAnalyzerFQN = metadata.field(kind.toString());
if (specializedAnalyzerFQN != null) {
for (String field : index.getFields()) {
analyzer.add(field, buildAnalyzer(indexAnalyzerFQN));
analyzer.add(prefix + field, buildAnalyzer(specializedAnalyzerFQN));
}
}

//specialized for each field
for (String field : index.getFields()) {
for (String meta : metadata.fieldNames()) {
if (meta.startsWith(field) && meta.contains(kind + "_analyzer")) {
analyzer.add(field, buildAnalyzer(metadata.<String>field(meta)));
}

String analyzerName = field + "_" + kind.toString();

String analyzerStopwords = analyzerName + "_stopwords";

if (metadata.containsField(analyzerName) && metadata.containsField(analyzerStopwords)) {
Collection<String> stopwords = metadata.field(analyzerStopwords);
analyzer.add(prefix + field, buildAnalyzer(metadata.<String>field(analyzerName), stopwords));
} else if (metadata.containsField(analyzerName)) {
analyzer.add(prefix + field, buildAnalyzer(metadata.<String>field(analyzerName)));
}
}

Expand Down Expand Up @@ -77,10 +80,37 @@ private Analyzer buildAnalyzer(String analyzerFQN) {
}

} catch (Exception e) {
OLogManager.instance()
.error(this, "Error on getting analyzer for Lucene index", e);
OLogManager.instance().error(this, "Error on getting analyzer for Lucene index", e);
}
return new StandardAnalyzer();
}

private Analyzer buildAnalyzer(String analyzerFQN, Collection<String> stopwords) {

try {

final Class classAnalyzer = Class.forName(analyzerFQN);
final Constructor constructor = classAnalyzer.getDeclaredConstructor(CharArraySet.class);

return (Analyzer) constructor.newInstance(new CharArraySet(stopwords, true));
} catch (ClassNotFoundException e) {
throw OException.wrapException(new OIndexException("Analyzer: " + analyzerFQN + " not found"), e);
} catch (NoSuchMethodException e) {
throw OException.wrapException(new OIndexException("Couldn't instantiate analyzer: public constructor not found"), e);

} catch (Exception e) {
OLogManager.instance().error(this, "Error on getting analyzer for Lucene index", e);
}
return new StandardAnalyzer();
}

public enum AnalyzerKind {
INDEX, QUERY;

@Override
public String toString() {
return name().toLowerCase();
}
}

}
Expand Up @@ -5,6 +5,7 @@
import com.orientechnologies.orient.core.index.OIndexDefinition;
import com.orientechnologies.orient.core.metadata.schema.OClass;
import com.orientechnologies.orient.core.metadata.schema.OProperty;
import com.orientechnologies.orient.core.metadata.schema.OSchema;
import com.orientechnologies.orient.core.record.impl.ODocument;

import java.util.HashMap;
Expand All @@ -20,14 +21,15 @@ public class OLuceneClassIndexContext {
protected final Map<String, Boolean> fieldsToStore = new HashMap<String, Boolean>();
protected final OClass indexClass;

public OLuceneClassIndexContext(OIndexDefinition definition, String name, boolean automatic, ODocument metadata) {
public OLuceneClassIndexContext(OSchema schema, OIndexDefinition definition, String name, boolean automatic, ODocument metadata) {
this.definition = definition;
this.name = name;
this.automatic = automatic;
this.metadata = metadata;

OLogManager.instance().info(this, "index definition:: " + definition);
indexClass = ODatabaseRecordThreadLocal.INSTANCE.get().getMetadata().getSchema().getClass(definition.getClassName());

indexClass = schema.getClass(definition.getClassName());

updateFieldToStore(definition);
}
Expand All @@ -47,4 +49,11 @@ private void updateFieldToStore(OIndexDefinition indexDefinition) {
}
}

public boolean isFieldToStore(String field) {
if (fieldsToStore.containsKey(field))
return fieldsToStore.get(field);
return false;

}

}
@@ -0,0 +1,67 @@
package com.orientechnologies.lucene.engine;

import com.orientechnologies.common.log.OLogManager;
import com.orientechnologies.orient.core.db.ODatabaseDocumentInternal;
import com.orientechnologies.orient.core.record.impl.ODocument;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.store.RAMDirectory;

import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
*
*
*
* Created by frank on 03/03/2016.
*/
public class OLuceneDirectoryFactory {

public static final String OLUCENE_BASE_DIR = "luceneIndexes";

public static final String DIRECTORY_TYPE = "directory_type";

public static final String DIRECTORY_NIO = "nio";
public static final String DIRECTORY_MMAP = "mmap";
public static final String DIRECTORY_RAM = "ram";

public static final String DIRECTORY_PATH = "directory_path";

public Directory createDirectory(ODatabaseDocumentInternal database, String indexName, ODocument metadata) {

String luceneType = metadata.containsField(DIRECTORY_TYPE) ? metadata.<String>field(DIRECTORY_TYPE) : DIRECTORY_MMAP;

if (database.getStorage().getType().equals("memory") || DIRECTORY_RAM.equals(luceneType)) {
return new RAMDirectory();
}

return createDirectory(database, indexName, metadata, luceneType);
}

private Directory createDirectory(ODatabaseDocumentInternal database, String indexName, ODocument metadata, String luceneType) {
String luceneBasePath = metadata.containsField(DIRECTORY_PATH) ? metadata.<String>field(DIRECTORY_PATH) : OLUCENE_BASE_DIR;

Path luceneIndexPath = Paths.get(database.getStorage().getConfiguration().getDirectory(), luceneBasePath, indexName);
try {

if (DIRECTORY_NIO.equals(luceneType)) {
return new NIOFSDirectory(luceneIndexPath);
}

if (DIRECTORY_MMAP.equals(luceneType)) {
return new MMapDirectory(luceneIndexPath);
}

} catch (IOException e) {
OLogManager.instance().error(this, "unable to create Lucene Directory with type " + luceneType, e);
}

OLogManager.instance().warn(this, "unable to create Lucene Directory, FALL BACK to ramDir");
return new RAMDirectory();
}

}

0 comments on commit 131a1a4

Please sign in to comment.