Skip to content
Browse files

added lucene connector

  • Loading branch information...
1 parent 19825d7 commit 3af161da34e17514e70bc99827bd1b9ec134715c Sun Ning committed
View
36 ansj_seq-solr/pom.xml
@@ -0,0 +1,36 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>info.sunng.segs</groupId>
+ <artifactId>ansjseg-solr</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+
+ <dependencies>
+ <dependency>
+ <groupId>info.sunng.segs</groupId>
+ <artifactId>ansjseg</artifactId>
+ <version>1.0.0-SNAPSHOT</version>
+ </dependency>
+
+ <dependency>
+ <groupId>org.apache.solr</groupId>
+ <artifactId>solr-core</artifactId>
+ <version>4.1.0</version>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.3.2</version>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+
+ </plugins>
+ </build>
+
+</project>
View
56 ansj_seq-solr/src/main/java/org/ansj/solr/AnsjTokenizer.java
@@ -0,0 +1,56 @@
+/**
+ *
+ */
+package org.ansj.solr;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.ansj.domain.Term;
+import org.ansj.splitWord.Analysis;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+/**
+ * @author nsun
+ *
+ */
+public class AnsjTokenizer extends Tokenizer {
+
+ private Analysis ansjAnalysis;
+ private CharTermAttribute termAtt;
+ private OffsetAttribute offsetAtt;
+ private TypeAttribute typeAtt;
+
+ public AnsjTokenizer(Analysis ana, Reader in) {
+ super(in);
+ this.ansjAnalysis = ana;
+
+ termAtt = addAttribute(CharTermAttribute.class);
+ offsetAtt = addAttribute(OffsetAttribute.class);
+ typeAtt = addAttribute(TypeAttribute.class);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.TokenStream#incrementToken()
+ */
+ @Override
+ public boolean incrementToken() throws IOException {
+ clearAttributes();
+ Term term = ansjAnalysis.next();
+
+ if (term != null) {
+ termAtt.append(term.getName());
+ offsetAtt.setOffset(term.getOffe(), term.getToValue());
+ typeAtt.setType(term.getNatrue().natureStr);
+
+ return true;
+ } else {
+ end();
+ return false;
+ }
+ }
+
+}
View
31 ansj_seq-solr/src/main/java/org/ansj/solr/AnsjTokenizerFactory.java
@@ -0,0 +1,31 @@
+/**
+ *
+ */
+package org.ansj.solr;
+
+import java.io.Reader;
+
+import org.ansj.splitWord.Analysis;
+import org.ansj.splitWord.analysis.ToAnalysis;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+/**
+ * @author nsun
+ *
+ */
+public class AnsjTokenizerFactory extends TokenizerFactory {
+
+ /* (non-Javadoc)
+ * @see org.apache.lucene.analysis.util.TokenizerFactory#create(java.io.Reader)
+ */
+ @Override
+ public Tokenizer create(Reader in) {
+ Analysis analysis = new ToAnalysis(in);
+ Tokenizer tokenizer = new AnsjTokenizer(analysis, in);
+
+ return tokenizer;
+ }
+
+}

0 comments on commit 3af161d

Please sign in to comment.
Something went wrong with that request. Please try again.