/
Indexer.java
80 lines (69 loc) · 3.24 KB
/
Indexer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package com.mycompany.luceneexam;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.gosen.GosenAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Indexer {
public static void main(String[] args) {
try {
File home = new File(System.getProperty("user.home"));
// インデックスの出力先を定義
Directory indexDir = FSDirectory.open(new File(home, "Documents/index").toPath());
// テキストの解析方法(アナライザー)を定義
//Analyzer analyzer = new StandardAnalyzer(); // 英語用
Analyzer analyzer = new GosenAnalyzer();
// 解析方法の設定
IndexWriterConfig config = new IndexWriterConfig(analyzer);
// インデックスが既に存在する場合の動作を定義する(OpenMode.CREATE の場合、新規に作成して上書きする)
config.setOpenMode(OpenMode.CREATE);
try (IndexWriter writer = new IndexWriter(indexDir, config)) {
File root = new File(home, "Documents/novels");
gatherDocs(writer, root);
}
} catch (IOException e) {
e.printStackTrace();
}
}
private static void gatherDocs(IndexWriter writer, File parent) throws IOException {
System.out.println(parent.getAbsolutePath());
for (File child : parent.listFiles()) {
if (child.isDirectory()) {
gatherDocs(writer, child);
continue;
}
String name = child.getName();
if (name.startsWith(".")) {
continue;
}
if (name.endsWith("txt")) {
System.out.format("FILE:%s\n", name);
try (BufferedReader br = Files.newBufferedReader(child.toPath(), Charset.forName("Windows-31J"))) {
String title = br.readLine();
String author = br.readLine();
System.out.format("%s %s\n", title, author);
// Document に、インデックスに保存する各ファイルの情報を設定する
Document doc = new Document();
doc.add(new StringField("author", author, Store.YES));
doc.add(new StringField("title", title, Store.YES));
doc.add(new TextField("contents", br));
// インデックスを書き出す
writer.addDocument(doc);
} catch(java.lang.Exception e) {
System.out.println("読み込み失敗");
}
}
}
}
}