Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
simsong committed Oct 6, 2014
1 parent 04c028a commit 6f800ec
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 52 deletions.
2 changes: 1 addition & 1 deletion configure.ac
Expand Up @@ -150,7 +150,7 @@ AM_CONDITIONAL([LIGHTGREP_ENABLED], [test "yes" = "$lightgrep"])
AC_ARG_ENABLE([flexscanners],
AS_HELP_STRING([--disable-flexscanners], [disable FLEX-based scanners]),
[],
[AC_DEFINE(FLEXSCANNERS_ENABLED, 1, [Use FLEX-based scanners]), flexscanners='yes'])
[AC_DEFINE(USE_FLEXSCANNERS, 1, [Use FLEX-based scanners]) flexscanners='yes'])
AM_CONDITIONAL([FLEXSCANNERS_ENABLED], [test "yes" = "$flexscanners"])


Expand Down
4 changes: 1 addition & 3 deletions python/bulk_extractor_reader.py
Expand Up @@ -230,7 +230,6 @@ def peak_memory(self):

def open(self,fname,mode='r'):
"""Opens a named file in the bulk report. Default is text mode.
Returns .bulk_extractor_reader as a pointer to self.
"""
# zipfile always opens in Binary mode, but generates an error
# if the 'b' is present, so remove it if present.
Expand All @@ -241,7 +240,6 @@ def open(self,fname,mode='r'):
mode = mode.replace("b","")+"b"
fn = os.path.join(self.dname,fname)
f = open(fn,mode=mode)
f.bulk_extractor_reader = self
return f

def count_lines(self,fname):
Expand Down Expand Up @@ -308,7 +306,7 @@ def read_histogram(self,fn):
def read_features(self,fname):
"""Just read the features out of a feature file"""
"""Usage: for (pos,feature,context) in br.read_features("fname")"""
for line in self.open(fname):
for line in self.open(fname,"rb"):
r = parse_feature_line(line)
if r:
yield r
Expand Down
15 changes: 1 addition & 14 deletions python/cda2_tool.py
Expand Up @@ -2,21 +2,8 @@
# coding=UTF-8
#
# Cross Drive Analysis tool for bulk extractor.
# V4
# Features of this program:
# --netmap -- makes a map of which computers exchanged packets (ethernet maps)
# --makestop -- Creates a stoplist of features that are on more than a fraction of the drives
# --threshold -- sets the fraction of drives necessary for a feature to be ignored
# --idfeatures -- spcifies which feature files are used for identity operations
#
# reads multiple bulk_extractor histogram files and outputs:
# stoplist.txt - list of email addresses on more than 1/3 of the disks
# targets.txt - list of email addresses not on stoplist and the # of drives on which they appear.
#
# Version 1.3 - Complete rewrite; elimiantes driveids and featureids, since strings
# in Python are hashable (and thus integers). Also uses bulk_extractor_reader

__version__='1.3.1'
__version__='2.0.0'
import os.path,sys

#if sys.version_info < (3,2):
Expand Down
7 changes: 5 additions & 2 deletions src/Makefile.am
Expand Up @@ -77,8 +77,11 @@ bulk_scanners = \
scan_winprefetch.cpp \
scan_wordlist.cpp \
scan_xor.cpp \
scan_zip.cpp \
$(flex_scanners)
scan_zip.cpp

if FLEXSCANNERS_ENABLED
bulk_scanners += $(flex_scanners)
endif

if LIGHTGREP_ENABLED
bulk_scanners += $(lightgrep_scanners)
Expand Down
2 changes: 1 addition & 1 deletion src/be13_api
Submodule be13_api updated from 1fdb96 to f88ae1
10 changes: 6 additions & 4 deletions src/bulk_extractor_scanners.cpp
Expand Up @@ -37,18 +37,20 @@

/* An array of the built-in scanners */
scanner_t *scanners_builtin[] = {
scan_accts,
scan_base16,
scan_base64,
scan_kml,
scan_email,
scan_httplogs,
scan_gps,
scan_net,
scan_find,
scan_wordlist,
scan_aes,
scan_json,
#if defined(USE_FLEXSCANNERS)
scan_accts,
scan_base16,
scan_email,
scan_gps,
#endif
#if defined(HAVE_LIBLIGHTGREP) && defined(USE_LIGHTGREP)
scan_accts_lg,
scan_base16_lg,
Expand Down
17 changes: 17 additions & 0 deletions src/scan_accts.flex
Expand Up @@ -249,6 +249,23 @@ DATEFORMAT ({DATEA}|{DATEB}|{DATEC}|{DATED})
s.pos += yyleng;
}

[ \t\n][+][1-9]([0-9]{10,18})/[^0-9] {
/* Experimental Regex to find phone numbers beginning with a + */
/* Phone numbers can be a maximum of 15 digits */
accts_scanner &s = *yyaccts_get_extra(yyscanner);
s.telephone_recorder->write_buf(SBUF,s.pos+1,yyleng-1);
s.pos += yyleng;
}

[ \t\n][0]([1-9][0-9]{9,15})/[^0-9] {
/* Experimental Regex to find phone numbers beginning with a 0 */
accts_scanner &s = *yyaccts_get_extra(yyscanner);
s.telephone_recorder->write_buf(SBUF,s.pos+1,yyleng-1);
s.pos += yyleng;
}



[^0-9]([0-9]{6}-){7}([0-9]{6})/[\r\n] {
accts_scanner &s = *yyaccts_get_extra(yyscanner);
s.alert_recorder->write(SBUF.pos0+s.pos,yytext+1,"Possible BitLocker Recovery Key (ASCII).");
Expand Down
103 changes: 77 additions & 26 deletions src/scan_hashdb.cpp
Expand Up @@ -53,13 +53,6 @@ static uint32_t hashdb_import_max_duplicates=0; // import only
enum mode_type_t {MODE_NONE, MODE_SCAN, MODE_IMPORT};
static mode_type_t mode = MODE_NONE;

// internal helper functions
static void do_import(const class scanner_params &sp,
const recursion_control_block &rcb);
static void do_scan(const class scanner_params &sp,
const recursion_control_block &rcb);
inline bool is_empty_block(const uint8_t *buf);

// global state

// hashdb directory, import only
Expand All @@ -73,6 +66,59 @@ typedef md5_generator hash_generator;
typedef hashdb_t__<hash_t> hashdb_t;
hashdb_t* hashdb;

static void do_import(const class scanner_params &sp,
const recursion_control_block &rcb);
static void do_scan(const class scanner_params &sp,
const recursion_control_block &rcb);


// rules for determining if a sector should be ignored
static bool ramp_sector(const sbuf_t &sbuf)
{
uint32_t count = 0;
for(size_t i=0;i<sbuf.pagesize-8;i+=4){
if (sbuf.get32u(i)+1 == sbuf.get32u(i+4)) {
count += 1;
}
}
return count > hashdb_block_size/8;
}

static bool hist_sector(const sbuf_t &sbuf)
{
std::map<uint32_t,uint32_t> hist;
for(size_t i=0;i<sbuf.pagesize-4;i+=4){
hist[sbuf.get32uBE(i)] += 1;
}
if (hist.size() < 3) return true;
for (std::map<uint32_t,uint32_t>::const_iterator it = hist.begin();it != hist.end(); it++){
if ((it->second) > hashdb_block_size/16){
return true;
}
}
return false;
}

static bool whitespace_sector(const sbuf_t &sbuf)
{
for(size_t i=0;i<sbuf.pagesize;i++){
if (!isspace(sbuf[i])) return false;
}
return true;
}

// detect if block is empty
inline bool is_empty_block(const uint8_t *buf) {
for (size_t i=1; i<hashdb_block_size; i++) {
if (buf[i] != buf[0]) {
return false;
}
}
return true;
}



extern "C"
void scan_hashdb(const class scanner_params &sp,
const recursion_control_block &rcb) {
Expand Down Expand Up @@ -285,7 +331,7 @@ void scan_hashdb(const class scanner_params &sp,
case scanner_params::PHASE_SCAN: {
switch(mode) {
case MODE_IMPORT:
do_import(sp, rcb);
do_import(sp, rcb);
return;

case MODE_SCAN:
Expand Down Expand Up @@ -333,6 +379,8 @@ static void do_import(const class scanner_params &sp,
}

// get count of blocks to process
// BRUCE - This is not a very efficient way to divide...

size_t count = sbuf.bufsize / hashdb_import_sector_size;
while ((count * hashdb_import_sector_size) +
(hashdb_block_size - hashdb_import_sector_size) > sbuf.pagesize) {
Expand All @@ -344,6 +392,11 @@ static void do_import(const class scanner_params &sp,
new std::vector<hashdb_t::import_element_t>;

// compose the filename based on the forensic path
// BRUCE - I don't like the way that "4" is hard-coded. I know that's the length of
// the unicode character, but hard-coding magic numbers is not good. If we need to do this,
// there should be methods in sbuf_t for separating the filename from the path.
// And what is a "map file delimiter" ?

std::string path_without_map_file_delimiter =
(sbuf.pos0.path.size() > 4) ?
std::string(sbuf.pos0.path, 0, sbuf.pos0.path.size() - 4) : "";
Expand All @@ -369,9 +422,9 @@ static void do_import(const class scanner_params &sp,
size_t offset = i * hashdb_import_sector_size;

// calculate the hash for this sector-aligned hash block
hash_t hash = hash_generator::hash_buf(
sbuf.buf + offset,
hashdb_block_size);
hash_t hash = hash_generator::hash_buf(
sbuf.buf + offset,
hashdb_block_size);

// ignore empty blocks
if (hashdb_ignore_empty_blocks && is_empty_block(sbuf.buf + offset)) {
Expand Down Expand Up @@ -413,6 +466,7 @@ static void do_scan(const class scanner_params &sp,
}

// get count of blocks to process
// BRUCE --- This is a poor way to do a division...
size_t count = sbuf.bufsize / hashdb_scan_sector_size;
while ((count * hashdb_scan_sector_size) +
(hashdb_block_size - hashdb_scan_sector_size) > sbuf.pagesize) {
Expand All @@ -423,7 +477,8 @@ static void do_scan(const class scanner_params &sp,
std::vector<hash_t>* scan_input = new std::vector<hash_t>;

// allocate space on heap for the offset lookup table
std::vector<uint32_t>* offset_lookup_table = new std::vector<uint32_t>;
// BRUCE - offset_lookup_table should be a vector of size_t, not uint32_t.
std::vector<size_t>* offset_lookup_table = new std::vector<size_t>;

// get the cryptograph hash values of all the blocks along
// sector boundaries from sbuf
Expand Down Expand Up @@ -466,18 +521,23 @@ static void do_scan(const class scanner_params &sp,
// as (pos0, hash_string, count_string)

// pos0
pos0_t pos0 = sbuf.pos0 + offset_lookup_table->at(it->first);
size_t offset = offset_lookup_table->at(it->first);
pos0_t pos0 = sbuf.pos0 + offset;

// hash_string
std::string hash_string = scan_input->at(it->first).hexdigest();

// count
std::stringstream ss;
ss << it->second;
std::string count_string = ss.str();
ss << it->second; // count

// Construct an sbuf from the sector and subject it to the other tests
const sbuf_t s = sbuf_t(sbuf,offset,hashdb_block_size);
if (ramp_sector(s)) ss << " R";
if (hist_sector(s)) ss << " H";
if (whitespace_sector(s)) ss << " W";

// record the feature
identified_blocks_recorder->write(pos0, hash_string, count_string);
identified_blocks_recorder->write(pos0, hash_string, ss.str());
}

// clean up
Expand All @@ -486,15 +546,6 @@ static void do_scan(const class scanner_params &sp,
delete scan_output;
}

// detect if block is empty
inline bool is_empty_block(const uint8_t *buf) {
for (size_t i=1; i<hashdb_block_size; i++) {
if (buf[i] != buf[0]) {
return false;
}
}
return true;
}

#endif

0 comments on commit 6f800ec

Please sign in to comment.