-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.rs
272 lines (234 loc) · 9.36 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
// Noblit -- An immutable append-only database
// Copyright 2020 Ruud van Asseldonk
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// A copy of the License has been included in the root of the repository.
//! Build a Noblit database from a "Have I Been Pwned?" file.
//!
//! This example provides a binary that converts the text-based dumps from
//! [Have I Been Pwned][hibp] into a Noblit database, in order to query quickly
//! whether a password is present in the dump.
//!
//! To run this example, first [download][hibp] and extract the text file with
//! pwned SHA-1 hashes from Have I Been Pwned. To convert it into a Noblit
//! database, run this example in “build” mode:
//!
//! ```text
//! $ haveibeenpwned build pwned.ndb pwned-passwords-sha1-ordered-by-count-v5.txt
//! ```
//!
//! Once the database has been built, we can check for the presence of a given
//! password with “check” mode:
//!
//! ```text
//! $ echo -n 'hello' | sha1sum
//! aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d -
//!
//! $ haveibeenpwned check pwned.ndb aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d
//! SHA1 is present with count 253581.
//! ```
//!
//! This example is inspired by [this post][stryku] on stryku.pl.
//!
//! [hibp]: https://haveibeenpwned.com/
//! [stryku]: http://stryku.pl/poetry/okon.php
extern crate noblit;
use std::env;
use std::fs;
use std::io::{BufRead, Write};
use std::io;
use std::process;
use std::str::FromStr;
use noblit::database;
use noblit::datom::Aid;
use noblit::datom::Value;
use noblit::memory_store::{MemoryStore, MemoryHeap};
use noblit::store::{PageSize4096};
use noblit::temp_heap::Temporaries;
type MemoryStore4096 = MemoryStore<PageSize4096>;
type Database = database::Database<MemoryStore4096, MemoryHeap>;
fn init_database() -> (Database, Schema) {
let store: MemoryStore4096 = MemoryStore::new();
let heap = MemoryHeap::new();
let mut db = Database::new(store, heap).unwrap();
let schema = assert_schema(&mut db);
(db, schema)
}
struct Schema {
pw_sha1: Aid,
pw_count: Aid,
}
struct Password {
sha1: [u8; 20],
count: u64,
}
fn assert_schema(db: &mut Database) -> Schema {
let db_attr_many = db.builtins.attribute_db_attribute_many;
let db_attr_name = db.builtins.attribute_db_attribute_name;
let db_attr_type = db.builtins.attribute_db_attribute_type;
let db_attr_unique = db.builtins.attribute_db_attribute_unique;
let db_type_bytes = db.builtins.entity_db_type_bytes;
let db_type_uint64 = db.builtins.entity_db_type_uint64;
// Build a transaction to set up the schema.
let mut tx = db.begin();
let mut tmps = Temporaries::new();
// Define two attributes: pw.sha1: bytes, and pw.count: uint64.
let eid_pw_sha1 = tx.create_entity();
tx.assert(eid_pw_sha1, db_attr_name, Value::from_str("pw.sha1", &mut tmps));
tx.assert(eid_pw_sha1, db_attr_type, Value::from_eid(db_type_bytes));
tx.assert(eid_pw_sha1, db_attr_unique, Value::from_bool(true));
tx.assert(eid_pw_sha1, db_attr_many, Value::from_bool(false));
let pw_sha1 = Aid(eid_pw_sha1.0);
let eid_pw_count = tx.create_entity();
tx.assert(eid_pw_count, db_attr_name, Value::from_str("pw.count", &mut tmps));
tx.assert(eid_pw_count, db_attr_type, Value::from_eid(db_type_uint64));
tx.assert(eid_pw_count, db_attr_unique, Value::from_bool(false));
tx.assert(eid_pw_count, db_attr_many, Value::from_bool(false));
let pw_count = Aid(eid_pw_count.0);
db.commit(&tmps, tx).expect("TODO: Good Result types.");
Schema {
pw_sha1: pw_sha1,
pw_count: pw_count,
}
}
/// Insert all passwords into the database. Leaves `pws` empty.
fn insert_batch(db: &mut Database, schema: &Schema, pws: &mut Vec<Password>) {
let mut tx = db.begin();
let mut tmps = Temporaries::new();
for pw in pws.drain(..) {
let eid = tx.create_entity();
let cid = tmps.push_bytes(Box::new(pw.sha1));
tx.assert(eid, schema.pw_sha1, Value::from_const_bytes(cid));
tx.assert(eid, schema.pw_count, Value::from_u64_inline(pw.count));
}
db.commit(&tmps, tx).expect("TODO: Good Result types.");
}
/// Run a query to check whether the SHA1 is present, print the results.
fn check_password<
Store: noblit::store::Store,
Heap: noblit::heap::Heap,
>(
db: &database::Database<Store, Heap>,
sha1: &[u8; 20],
) {
use noblit::query::{Query, Statement, Var};
let mut temporaries = Temporaries::new();
let cid_pw_sha1 = temporaries.push_string("pw.sha1".to_string());
let cid_pw_count = temporaries.push_string("pw.count".to_string());
let cid_sha1 = temporaries.push_bytes(Box::new(sha1.clone()));
let mut view = db.view(temporaries);
// Encode this query:
// TODO: Add a more user-friendly way of doing this.
// where
// pw pw.sha1 <sha1>
// pw pw.count c
// select
// c
let mut query = Query {
variable_names: vec![
"pw".to_string(), // 0
"c".to_string(), // 1
],
where_statements: vec![
Statement::named_const(Var(0), cid_pw_sha1, Value::from_const_bytes(cid_sha1)),
Statement::named_var(Var(0), cid_pw_count, Var(1)),
],
select: vec![Var(1)],
};
query.fix_attributes(&mut view);
query.infer_types(&view).expect("Query contains a type error.");
let plan = noblit::planner::Planner::plan(&query);
let eval = noblit::eval::Evaluator::new(&plan, &view);
let rows: Vec<_> = eval.collect();
match rows.len() {
0 => println!("SHA1 is not present in the database."),
1 => println!("SHA1 is present with count {}.", rows[0][0].as_u64(view.heap())),
_ => panic!("SHA1 was found more than once."),
}
// TODO: Print timing information.
}
/// Parse a hexadecimal sha1 hash, or crash.
fn parse_sha1(sha1_hex: &str) -> [u8; 20] {
assert_eq!(sha1_hex.len(), 40);
let mut sha1 = [0; 20];
for i in 0..20 {
let byte_str = &sha1_hex[i * 2..i * 2 + 2];
let byte = u8::from_str_radix(byte_str, 16).expect("Expected hexadecimal SHA1 hash.");
sha1[i] = byte;
}
sha1
}
fn print_usage() {
println!("Usage:");
println!(" haveibeenpwned build out.ndb pwned-passwords-sha1.txt");
println!(" haveibeenpwned check out.ndb <pw-sha1-hex>");
}
fn main() {
if env::args().len() < 4 {
print_usage();
process::exit(1);
}
let cmd = env::args().nth(1).unwrap();
let db_path = env::args().nth(2).unwrap();
let arg = env::args().nth(3).unwrap();
match &cmd[..] {
"build" => {
let (mut db, schema) = init_database();
let f = fs::File::open(arg).expect("Failed to open input file.");
let mut reader = io::BufReader::new(f);
let mut batch = Vec::new();
let mut i_batch = 0;
for (i, opt_line) in reader.lines().enumerate() {
let line = opt_line.expect("Failed to read input line.");
// The lines have the format "<sha1>:<count>", with the sha1 in
// hexadecimal (40 characters), a colon, and the count in ascii
// decimal digits.
let sha1_hex = &line[..40];
let count_str = &line[41..];
assert_eq!(&line[40..41], ":");
let pw = Password {
sha1: parse_sha1(sha1_hex),
count: u64::from_str(count_str).expect("Failed to parse count."),
};
batch.push(pw);
if batch.len() >= 100_000 {
insert_batch(&mut db, &schema, &mut batch);
i_batch += 1;
print!("\rInserted {} batches, {} passwords.", i_batch, i + 1);
io::stdout().flush().unwrap();
}
if db.get_store().as_bytes().len() >= 1_000_000_000 {
println!("");
println!("Stopping after {} passwords, store grew larger than 1G.", i + 1);
break
}
}
let f = fs::File::create(db_path).expect("Failed to open output file.");
let mut writer = io::BufWriter::new(f);
noblit::disk::write_packed(&db, &mut writer).expect("Failed to write databse.");
}
// For the check command, by default we use the mmap version, which is
// fast, and works even if the database does not fit in RAM, but which
// can be unsafe depending on how you use it.
"check" => {
let needle = parse_sha1(&arg[..]);
let f = fs::File::open(db_path).expect("Failed to open input database.");
let db = noblit::disk::mmap_packed(&f).expect("Failed to read databse.");
check_password(&db, &needle);
}
// As a sanity check, we also offer the check against an in-memory
// database which first loads the entire database into memory.
"check-inmem" => {
let needle = parse_sha1(&arg[..]);
let f = fs::File::open(db_path).expect("Failed to open input database.");
let mut reader = io::BufReader::new(f);
let db: Database =
noblit::disk::read_packed(&mut reader).expect("Failed to read databse.");
check_password(&db, &needle);
}
_ => {
print_usage();
process::exit(1);
}
}
}