Skip to content

Commit

Permalink
Add runtime integrity self-testing of wordlists
Browse files Browse the repository at this point in the history
The code now self-tests the SHA-256 hashes of the wordlists with which
it was compiled.  I am a bit nervous about build-time bugs in platform
shell tools which might behave differently in different locales, etc.
Will *your* platform's shell tools process UTF-8 strings without
mangling?  Now, you can be sure with `make check`.

This test currently runs by default.  It is slow, so I will probably
disable it when the undocumented -T flag is not used.  I do think the
proportion of test code to feature code is growing appropriately, for a
utility which pertains to Other People's Money.

I also added code to reproduce a wordlist on stdout, in a format
identical to the source file.  However, that function cannot yet be used
due to lack of language selection UI.

The language selection UI is next!
  • Loading branch information
nym-zone committed Dec 31, 2017
1 parent 5b6a666 commit 8bfefc6
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 5 deletions.
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ LDADD=-lmd

SEDRE=-E

HASHPROG=sha256 -q

.include "Makefile.inc"

.include <bsd.prog.mk>
6 changes: 5 additions & 1 deletion Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ $(PROG).$(MANSEC).txt: $(PROG).$(MANSEC)

wordlist.h: wordlist/*
for lang in $(WORDLISTS) ; do \
echo "const char *$${lang}[2048] = {" ; \
echo "static const char *$${lang}[2048] = {" ; \
{ \
ctr=2048 ; \
while read w ; do \
Expand All @@ -42,6 +42,10 @@ wordlist.h: wordlist/*
printf '\tu8"%s",\n' "$$w" ; \
fi ; \
done < wordlist/$${lang}.txt ; \
printf 'static const char %s_hash[] = "%s";\n' \
$${lang} \
`$(HASHPROG) wordlist/$${lang}.txt | \
grep -Eo '^[0-9a-f]+'` ; \
} ; \
done > $@

Expand Down
2 changes: 2 additions & 0 deletions Makefile.linux
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ SEDRE=-r

all: easyseed

HASHPROG=sha256sum

include Makefile.inc

easyseed: wordlist.h $(OBJS)
Expand Down
80 changes: 76 additions & 4 deletions easyseed.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,15 @@ struct wordlist {
const char *code2;
const char *space;
const char **wordlist;
const char *hash; /* SHA-256 */
};

static const char ascii_space[] = " ";

#include "wordlist.h"

#define LANG(name, lname, code2, space) \
{ #name, lname, code2, space, name }
{ #name, lname, code2, space, name, name##_hash }

/*
* XXX: BUG: zh-TW and zh-CN are inaccurate descriptors. HK Chinese use
Expand Down Expand Up @@ -296,10 +297,76 @@ selftest(int T_flag)
errors, ntests);
abort();
}
if (T_flag) {
if (T_flag)
fprintf(f, "%u/%u self-tests succeeded.\n", ntests, ntests);
exit(0);
}

/*
* The following function is for the purpose of sanity-checking the
* build system. I fear that some platform's shell tools may mangle
* UTF-8. With this function, it can be exactly verified by hand that
* the compiled-in wordlist is identical to the source wordlist.
*/
static void
reproduce_wordlist(const struct wordlist *wl)
{

fprintf(stderr, "%s %s.txt\n", wl->hash, wl->name);
for (int i = 0; i < 2048; ++i)
printf("%s\n", wl->wordlist[i]);
}

static void
selftest_wordlists(int T_flag)
{
const char hex[16] = "0123456789abcdef";
char txthash[65], *cur;
unsigned char buf[32];
SHA256_CTX ctx;
unsigned errors = 0;
FILE *f;

f = T_flag? stdout : stderr;

for (int i = 0; i < sizeof(wordlists)/sizeof(*wordlists); ++i) {
SHA256_Init(&ctx);
for (int j = 0; j < 2048; ++j) {
const char *word = wordlists[i].wordlist[j];
SHA256_Update(&ctx, word, strlen(word));
/* XXX: Horrid inefficiency. */
SHA256_Update(&ctx, "\n", 1);
}
SHA256_Final(buf, &ctx);

cur = txthash;
for (int i = 0; i < 32; ++i)
*cur++ = hex[buf[i] >> 4], *cur++ = hex[buf[i] & 0xf];
*cur = '\0';

if (strncmp(wordlists[i].hash, txthash, 64) != 0) {
fprintf(f, "Hash failure for wordlist \"%s.txt\". "
"Compile-time hash:\n%s\n"
"Auto-checked hash:\n%s\n",
wordlists[i].name, wordlists[i].hash, txthash);
++errors;
} else if (T_flag)
printf("%s %s.txt\n", txthash, wordlists[i].name);
}

if (errors)
abort();
}

static void
printlang(void)
{

printf("# Available wordlists and selectors:\n");
for (int i = 0; i < sizeof(wordlists)/sizeof(*wordlists); ++i)
printf("\t%s: \"%s\" (%s)\n", wordlists[i].name,
wordlists[i].lname, wordlists[i].code2);

exit(1);
}

int
Expand All @@ -315,7 +382,7 @@ main(int argc, char *argv[])
size_t len;

opterr = 0;
while ((ch = getopt(argc, argv, ":b:k:OT")) > -1) {
while ((ch = getopt(argc, argv, ":LOTb:k:")) > -1) {
switch (ch) {
case 'b': /* bits */
/* XXX: atoi(), hahah */
Expand All @@ -324,6 +391,8 @@ main(int argc, char *argv[])
case 'k':
keymat = optarg;
break;
case 'L':
printlang();
case 'O':
O_flag = 1;
break;
Expand Down Expand Up @@ -351,6 +420,9 @@ main(int argc, char *argv[])
err(2, "open() on /dev/null");

selftest(T_flag);
selftest_wordlists(T_flag);
if (T_flag)
return (0);

nbytes = nbits/8;

Expand Down

0 comments on commit 8bfefc6

Please sign in to comment.