-
Notifications
You must be signed in to change notification settings - Fork 12
/
main.rs
101 lines (87 loc) · 2.88 KB
/
main.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
extern crate clap;
use std::collections::{HashSet, HashMap, hash_map, hash_map::DefaultHasher};
use std::hash::{Hasher};
use std::io::{stdin, BufRead, BufReader, stdout, Write, BufWriter};
use std::slice;
use clap::{Arg, App};
use anyhow::Result;
fn count_cmd(delim: u8) -> Result<()> {
let mut set = HashMap::<Vec<u8>, u64>::new();
for line in BufReader::new(stdin().lock()).split(delim) {
match set.entry(line?) {
hash_map::Entry::Occupied(mut e) => { *e.get_mut() += 1; },
hash_map::Entry::Vacant(e) => { e.insert(1); }
}
}
let out = stdout();
let mut out = BufWriter::new(out.lock());
for (line, count) in set.iter() {
write!(out, "{} ", count)?;
out.write(&line)?;
out.write(slice::from_ref(&delim))?;
}
Ok(())
}
fn hash<T: std::hash::Hash>(v: &T) -> u64 {
let mut s = DefaultHasher::new();
v.hash(&mut s);
s.finish()
}
fn uniq_cmd(delim: u8) -> Result<()> {
let out = stdout();
let mut out = BufWriter::new(out.lock());
let mut set = HashSet::<u64>::new();
for line in BufReader::new(stdin().lock()).split(delim) {
let line = line?;
if set.insert(hash(&line)) {
out.write(&line)?;
out.write(slice::from_ref(&delim))?;
}
}
Ok(())
}
fn try_main() -> Result<()> {
let mut argspec = App::new("huniq")
.version("2.0.3")
.about("Remove duplicates from stdin, using a hash table")
.author("Karolin Varner <karo@cupdev.net)")
.arg(Arg::with_name("count")
.help("Output the amount of times a line was encountered")
.long("count")
.short("c"))
.arg(Arg::with_name("delimiter")
.help("Which delimiter between elements to use. By default `\n` is used")
.long("delimiter")
.long("delim")
.short("d")
.takes_value(true)
.default_value("\n")
.validator(|v| match v.len() {
1 => Ok(()),
_ => Err(String::from("\
Only ascii characters are supported as delimiters. \
Use sed to turn your delimiter into zero bytes?
$ echo -n \"1λ1λ2λ3\" | sed 's@λ@\x00@g' | huniq -0 | sed 's@\x00@λ@g'
1λ2λ3λ"
))
}))
.arg(Arg::with_name("null")
.help("Use the \\0 character as the record delimiter.")
.long("null")
.short("0")
.conflicts_with("delimiter"));
let args = argspec.get_matches_from_safe_borrow(&mut std::env::args_os())?;
let delim = match args.is_present("null") {
true => b'\0',
false => args.value_of("delimiter").unwrap().as_bytes()[0]
};
match args.is_present("count") {
true => count_cmd(delim),
false => uniq_cmd(delim)
}
}
fn main() {
if let Err(er) = try_main() {
println!("{}", er);
}
}