Skip to content
Permalink
Browse files

Update to 0.0.2, add flags, refactor visitors.

  • Loading branch information...
mikispag committed Nov 17, 2018
1 parent 1b098a5 commit 243a5acdeb939ad2c3872df8f1751c5574678cb2
Showing with 87 additions and 54 deletions.
  1. +2 −1 Cargo.toml
  2. +0 −1 rustfmt.toml
  3. +3 −7 src/blockchain.rs
  4. +59 −6 src/main.rs
  5. +2 −3 src/preamble.rs
  6. +9 −17 src/visitors/clusterizer.rs
  7. +12 −19 src/visitors/dump_balances.rs
@@ -1,13 +1,14 @@
[package]
name = "bitiodine-rust"
version = "0.0.1"
version = "0.0.2"
authors = ["Michele Spagnuolo <mikispag@gmail.com>"]

[dependencies]
arrayref = ">=0.3"
bitcoin-bech32 = ">=0.8"
byteorder = ">=1.2"
base58 = ">=0.1"
clap = ">=2.32.0"
dirs = ">=1.0"
rust-crypto = ">=0.2"
log = ">=0.4"

This file was deleted.

@@ -2,7 +2,6 @@ extern crate dirs;

use memmap::Mmap;
use preamble::*;
use std::fs::File;

#[derive(PartialEq, Eq, Debug, Copy, Clone)]
struct InitIndexEntry<'a> {
@@ -15,16 +14,13 @@ pub struct BlockChain {
}

impl BlockChain {
pub unsafe fn read() -> BlockChain {
pub unsafe fn read(blocks_dir: &str) -> BlockChain {
let mut maps: Vec<Mmap> = Vec::new();
let mut n: usize = 0;
let blocks_dir = dirs::home_dir()
.expect("Unable to get the home directory!")
.join(".bitcoin")
.join("blocks");
let blocks_dir_path = PathBuf::from(blocks_dir);

loop {
match File::open(blocks_dir.join(format!("blk{:05}.dat", n))) {
match File::open(blocks_dir_path.join(format!("blk{:05}.dat", n))) {
Ok(f) => {
n += 1;
match Mmap::map(&f) {
@@ -3,6 +3,7 @@ extern crate arrayref;
extern crate base58;
extern crate bitcoin_bech32;
extern crate byteorder;
extern crate clap;
extern crate crypto;
extern crate memmap;
extern crate rustc_serialize;
@@ -32,23 +33,27 @@ mod transactions;
pub mod visitors;

use blockchain::BlockChain;
use clap::{App, Arg};
use env_logger::Builder;
use log::LevelFilter;
use visitors::clusterizer::Clusterizer;
use visitors::BlockChainVisitor;
//use visitors::dump_balances::DumpBalances;
//use visitors::dump_tx_hashes::DumpTxHashes;

use std::io::Write;
use std::fs::File;
use std::io::{LineWriter, Write};

pub use address::Address;
pub use hash::Hash;
pub use header::BlockHeader;
pub use script::HighLevel;

fn initialize_logger() {
const VERSION: &'static str = env!("CARGO_PKG_VERSION");

fn initialize_logger(level_filter: LevelFilter) {
Builder::new()
.filter(None, LevelFilter::Info)
.filter(None, level_filter)
.format(|buf, record| {
let t = time::now();
writeln!(
@@ -64,8 +69,47 @@ fn initialize_logger() {
}

fn main() {
initialize_logger();
let chain = unsafe { BlockChain::read() };
let default_blocks_dir = dirs::home_dir()
.expect("Unable to get the home directory!")
.join(".bitcoin")
.join("blocks")
.into_os_string()
.into_string()
.expect("Unable to build a default bitcoind blocks directory!");

let matches = App::new("BitIodine")
.version(VERSION)
.author("Michele Spagnuolo <mikispag@gmail.com>")
.about("A Rust Bitcoin blockchain parser with clustering capabilities, allowing to group together addresses in ownership clusters.")
.arg(Arg::with_name("blocks_dir")
.help("Sets the path to the bitcoind blocks directory")
.long("blocks-dir")
.short("b")
.takes_value(true)
.value_name("BLOCKS_DIRECTORY_PATH")
.default_value(&default_blocks_dir))
.arg(Arg::with_name("output")
.help("Sets the path to the output clusters.csv file")
.long("output")
.short("o")
.takes_value(true)
.value_name("OUTPUT_FILE")
.default_value("clusters.csv"))
.arg(Arg::with_name("v")
.short("v")
.multiple(true)
.help("Sets the level of verbosity"))
.get_matches();

let level_filter: LevelFilter;
match matches.occurrences_of("v") {
0 => level_filter = LevelFilter::Info,
1 => level_filter = LevelFilter::Debug,
2 | _ => level_filter = LevelFilter::Off,
}
initialize_logger(level_filter);

let chain = unsafe { BlockChain::read(matches.value_of("blocks_dir").unwrap()) };

/*
let (_, _, _) = chain
@@ -75,7 +119,16 @@ fn main() {

let mut clusterizer_visitor = Clusterizer::new();
let (_, _, _) = chain.walk(&mut clusterizer_visitor).unwrap();
let _clusters_count = clusterizer_visitor.done();
let (_clusters_count, visitor_output) =
clusterizer_visitor.done().expect("Clusterizer failed!");

let mut writer = LineWriter::new(
File::create(matches.value_of("output").unwrap())
.expect("Unable to create the output file!"),
);
writer
.write_all(visitor_output.as_bytes())
.expect("Unable to write output file!");

/*
let (_, _, map) = chain.walk(&mut visitors::dataoutput_finder::DataOutputFinder).unwrap();
@@ -14,8 +14,7 @@ pub use visitors::BlockChainVisitor;
pub use byteorder::{ByteOrder, LittleEndian, ReadBytesExt};
pub use std::collections::hash_map::Entry as HashEntry;
pub use std::collections::HashMap;
pub use std::fs::{self, File, OpenOptions};
pub use std::io::{LineWriter, Write};
pub use std::path::Path;
pub use std::fs::File;
pub use std::path::{Path, PathBuf};
pub use vec_map::VecMap;
pub use void::Void;
@@ -5,7 +5,6 @@ use std::result;

pub struct Clusterizer {
clusters: DisjointSet<Address>,
writer: LineWriter<File>,
}

/// Tarjan's Union-Find data structure.
@@ -16,6 +15,8 @@ pub struct DisjointSet<T: Clone + Hash + Eq> {
map: HashMap<T, usize>, // Each T entry is mapped onto a usize tag.
}

const OUTPUT_STRING_CAPACITY: usize = 100usize * 234000000usize;

impl<T> DisjointSet<T>
where
T: Clone + Hash + Eq,
@@ -130,19 +131,11 @@ impl<'a> BlockChainVisitor<'a> for Clusterizer {
type BlockItem = ();
type TransactionItem = HashSet<Address>;
type OutputItem = Address;
type DoneItem = usize;
type DoneItem = (usize, String);

fn new() -> Self {
Self {
clusters: DisjointSet::new(),
writer: LineWriter::new(
OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(Path::new("clusters.csv.tmp"))
.unwrap(),
),
}
}

@@ -213,16 +206,15 @@ impl<'a> BlockChainVisitor<'a> for Clusterizer {
}
}

fn done(&mut self) -> Result<usize> {
fn done(&mut self) -> Result<(usize, String)> {
self.clusters.finalize();
info!("Exporting {} clusters to CSV...", self.clusters.size());

let mut output_string = String::with_capacity(OUTPUT_STRING_CAPACITY);
for (address, tag) in &self.clusters.map {
self.writer
.write_all(format!("{},{}\n", address, self.clusters.parent[*tag]).as_bytes())?;
output_string.push_str(&format!("{},{}\n", address, self.clusters.parent[*tag]));
}

fs::rename(Path::new("clusters.csv.tmp"), Path::new("clusters.csv"))?;
info!("Exported {} clusters to CSV.", self.clusters.size());
Ok(self.clusters.size())
info!("{} clusters generated.", self.clusters.size());
Ok((self.clusters.size(), output_string))
}
}
@@ -2,26 +2,19 @@ use preamble::*;

pub struct DumpBalances {
balances: HashMap<(Address, Option<Hash160>), i64>,
writer: LineWriter<File>,
}

const OUTPUT_STRING_CAPACITY: usize = 100000000usize;

impl<'a> BlockChainVisitor<'a> for DumpBalances {
type BlockItem = ();
type TransactionItem = ();
type OutputItem = (Address, Option<Hash160>, i64);
type DoneItem = ();
type DoneItem = (usize, String);

fn new() -> Self {
Self {
balances: HashMap::with_capacity(1000000),
writer: LineWriter::new(
OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(Path::new("address_balances.csv"))
.unwrap(),
),
}
}

@@ -106,22 +99,22 @@ impl<'a> BlockChainVisitor<'a> for DumpBalances {
}

fn done(&mut self) -> Result<Self::DoneItem> {
let mut output_string = String::with_capacity(OUTPUT_STRING_CAPACITY);

for (address_tuple, balance) in &self.balances {
if *balance == 0 {
continue;
}
let address = &address_tuple.0;
let hash160 = address_tuple.1.unwrap_or_default();
self.writer.write_all(
format!(
"{:.8},{},{}\n",
balance.to_owned() as f64 * 10f64.powf(-8f64),
hash160,
address
).as_bytes(),
)?;
output_string.push_str(&format!(
"{:.8},{},{}\n",
balance.to_owned() as f64 * 10f64.powf(-8f64),
hash160,
address
));
}

Ok(())
Ok((self.balances.len(), output_string))
}
}

0 comments on commit 243a5ac

Please sign in to comment.
You can’t perform that action at this time.