/
CreateBrowseSQLite.java
156 lines (111 loc) · 3.87 KB
/
CreateBrowseSQLite.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
//
// Author: Mark Triggs <mark@dishevelled.net>
//
import java.io.*;
import java.util.*;
import java.sql.*;
// Note that this version is coming from Solr!
import org.apache.commons.codec.binary.Base64;
public class CreateBrowseSQLite
{
private Connection outputDB;
private String KEY_SEPARATOR = "\1";
/*
* Like BufferedReader#readLine(), but only returns lines ended by a \r\n.
*/
private String readCRLFLine (BufferedReader br) throws IOException
{
StringBuilder sb = new StringBuilder();
while (true) {
int ch = br.read ();
if (ch >= 0) {
if (ch == '\r') {
// This might either be a carriage return embedded in record
// data (which we want to preserve) or the first part of the
// \r\n end of line marker.
ch = br.read ();
if (ch == '\n') {
// An end of line. We're done.
return sb.toString();
}
// Must have been an embedded carriage return. Keep it.
sb.append('\r');
}
sb.append((char) ch);
} else {
// EOF. Show's over.
return null;
}
}
}
private void loadHeadings (BufferedReader br)
throws Exception
{
int count = 0;
outputDB.setAutoCommit (false);
PreparedStatement prep = outputDB.prepareStatement (
"insert or ignore into all_headings (key, heading) values (?, ?)");
String line;
while ((line = readCRLFLine (br)) != null) {
int sep = line.indexOf (KEY_SEPARATOR.charAt (0));
if (sep >= 0) {
byte[] key = Base64.decodeBase64 (line.substring (0, sep).getBytes());
prep.setBytes (1, key);
prep.setString (2, line.substring (sep + 1));
prep.addBatch ();
}
if ((count % 500000) == 0) {
prep.executeBatch ();
prep.clearBatch ();
}
count++;
}
prep.executeBatch ();
prep.close ();
outputDB.commit ();
outputDB.setAutoCommit (true);
}
private void setupDatabase ()
throws Exception
{
Statement stat = outputDB.createStatement ();
stat.executeUpdate ("drop table if exists all_headings;");
stat.executeUpdate ("create table all_headings (key, heading);");
stat.executeUpdate ("PRAGMA synchronous = OFF;");
stat.execute ("PRAGMA journal_mode = OFF;");
stat.close ();
}
private void buildOrderedTables ()
throws Exception
{
Statement stat = outputDB.createStatement ();
stat.executeUpdate ("drop table if exists headings;");
stat.executeUpdate ("create table headings " +
"as select * from all_headings order by key;");
stat.executeUpdate ("create index keyindex on headings (key);");
stat.close ();
}
public void create (String headingsFile, String outputPath)
throws Exception
{
Class.forName ("org.sqlite.JDBC");
outputDB = DriverManager.getConnection ("jdbc:sqlite:" + outputPath);
setupDatabase ();
BufferedReader br = new BufferedReader
(new FileReader (headingsFile));
loadHeadings (br);
br.close ();
buildOrderedTables ();
}
public static void main (String args[])
throws Exception
{
if (args.length != 2) {
System.err.println
("Usage: CreateBrowseSQLite <headings file> <db file>");
System.exit (0);
}
CreateBrowseSQLite self = new CreateBrowseSQLite ();
self.create (args[0], args[1]);
}
}