|
2 | 2 | ;; http://www.flownet.com/ron/papers/lisp-java/ |
3 | 3 | ;; Given a list of words and a list of phone numbers, find all the ways that |
4 | 4 | ;; each phone number can be expressed as a list of words. |
| 5 | + |
5 | 6 | ;; Run: (main "word-list-file-name" "phone-number-file-name") |
6 | 7 |
|
7 | 8 | (declaim (optimize (speed 3) (debug 0) (safety 0))) |
8 | 9 | (setq *block-compile-default* t) |
9 | 10 |
|
10 | | -(declaim (inline nth-digit char->digit)) |
| 11 | +;; (declaim (inline nth-digit nth-digit-string char->digit)) |
11 | 12 |
|
12 | | -(declaim (ftype (function (string (unsigned-byte 8)) (unsigned-byte 8)) nth-digit)) |
| 13 | +(declaim (ftype (function (string (unsigned-byte 8)) (bit-vector)) nth-digit)) |
13 | 14 | (defun nth-digit (digits i) |
| 15 | + "The i-th element of a character string of digits, as a bit-vector representing 0 to 9." |
| 16 | + (ecase (char digits i) |
| 17 | + (#\0 #*0000) |
| 18 | + (#\1 #*0001) |
| 19 | + (#\2 #*0010) |
| 20 | + (#\3 #*0011) |
| 21 | + (#\4 #*0100) |
| 22 | + (#\5 #*0101) |
| 23 | + (#\6 #*0110) |
| 24 | + (#\7 #*0111) |
| 25 | + (#\8 #*1000) |
| 26 | + (#\9 #*1001))) |
| 27 | + |
| 28 | +(declaim (ftype (function (string (unsigned-byte 8)) string) nth-digit-string)) |
| 29 | +(defun nth-digit-string (digits i) |
14 | 30 | "The i-th element of a character string of digits, as an integer 0 to 9." |
15 | | - (- (char-code (char digits i)) #.(char-code #\0))) |
| 31 | + (string (char digits i))) |
| 32 | + |
| 33 | +(eval-when (:compile-toplevel) |
| 34 | + (defun mapchar (&rest chars) |
| 35 | + "Map each char in chars to its char-code and that of its upper-case." |
| 36 | + (loop for ch in chars |
| 37 | + collect (char-code ch) |
| 38 | + collect (char-code (char-upcase ch))))) |
16 | 39 |
|
17 | | -(declaim (ftype (function (base-char) (unsigned-byte 8)) char->digit)) |
18 | | -(defun char->digit (ch) |
19 | | - "Convert a character to a digit according to the phone number rules." |
20 | | - (ecase (char-downcase ch) |
21 | | - ((#\e) 0) |
22 | | - ((#\j #\n #\q) 1) |
23 | | - ((#\r #\w #\x) 2) |
24 | | - ((#\d #\s #\y) 3) |
25 | | - ((#\f #\t) 4) |
26 | | - ((#\a #\m) 5) |
27 | | - ((#\c #\i #\v) 6) |
28 | | - ((#\b #\k #\u) 7) |
29 | | - ((#\l #\o #\p) 8) |
30 | | - ((#\g #\h #\z) 9))) |
| 40 | +(declaim (ftype (function ((unsigned-byte 8)) (bit-vector)) byte->digit)) |
| 41 | +(defun byte->digit (b) |
| 42 | + "Convert a byte (alphabetic ASCII) to a bit-vector representing a digit according |
| 43 | + to the phone number rules." |
| 44 | + (ecase b |
| 45 | + (#.(mapchar #\e) #*0000) |
| 46 | + (#.(mapchar #\j #\n #\q) #*0001) |
| 47 | + (#.(mapchar #\r #\w #\x) #*0010) |
| 48 | + (#.(mapchar #\d #\s #\y) #*0011) |
| 49 | + (#.(mapchar #\f #\t) #*0100) |
| 50 | + (#.(mapchar #\a #\m) #*0101) |
| 51 | + (#.(mapchar #\c #\i #\v) #*0110) |
| 52 | + (#.(mapchar #\b #\k #\u) #*0111) |
| 53 | + (#.(mapchar #\l #\o #\p) #*1000) |
| 54 | + (#.(mapchar #\g #\h #\z) #*1001))) |
31 | 55 |
|
32 | | -(declaim (ftype (function (string) integer))) |
| 56 | +(declaim (ftype (function ((unsigned-byte 8)) boolean) alpha-byte-p)) |
| 57 | +(defun alpha-byte-p (b) |
| 58 | + (or |
| 59 | + (and (>= b #.(char-code #\a)) (<= b #.(char-code #\z))) |
| 60 | + (and (>= b #.(char-code #\A)) (<= b #.(char-code #\Z))))) |
| 61 | + |
| 62 | +(declaim (ftype (function (string) boolean) digitp)) |
| 63 | +(defun digitp (s) |
| 64 | + (if (and (eq 1 (length s)) |
| 65 | + (let ((ch (elt s 0))) (digit-char-p ch))) |
| 66 | + t |
| 67 | + nil)) |
| 68 | + |
| 69 | +(declaim (ftype (function (string) (bit-vector)))) |
33 | 70 | (defun word->number (word) |
34 | 71 | "Translate a word (string) into a phone number, according to the rules." |
35 | | - (let ((n 1)) ; leading zero problem |
36 | | - (declare (type integer n)) |
37 | | - (loop for i from 0 below (length word) |
38 | | - for ch = (char word i) do |
39 | | - (when (alpha-char-p ch) (setf n (+ (* 10 n) (char->digit ch))))) |
| 72 | + (let ((n (make-sequence 'bit-vector #.(* 25 8) :initial-element 1)) |
| 73 | + (word-bytes (string-to-octets word))) |
| 74 | + (loop for i from 0 below (length word-bytes) |
| 75 | + for b = (elt word-bytes i) |
| 76 | + with byte-start = 0 |
| 77 | + with byte-end = 4 |
| 78 | + when (alpha-byte-p b) do |
| 79 | + (setf (subseq n byte-start byte-end) (byte->digit b) |
| 80 | + byte-start byte-end |
| 81 | + byte-end (+ byte-end 4))) |
40 | 82 | n)) |
41 | 83 |
|
42 | 84 | (defglobal *dict* nil |
|
51 | 93 | of words found for (subseq DIGITS 0 START). So if START gets to the end of |
52 | 94 | DIGITS, then we have a solution in WORDS. Otherwise, for every prefix of |
53 | 95 | DIGITS, look in the dictionary for word(s) that map to the value of the |
54 | | - prefix (computed incrementally as N), and for each such word try to extend |
| 96 | + prefix (computed incrementally as KEY), and for each such word try to extend |
55 | 97 | the solution with a recursive call. There are two complications: (1) the |
56 | 98 | rules say that in addition to dictionary words, you can use a single |
57 | 99 | digit in the output, but not two digits in a row. Also (and this seems |
|
60 | 102 | and the most recent word is not a digit, try a recursive call that pushes a |
61 | 103 | digit. (2) The other complication is that the obvious way of mapping |
62 | 104 | strings to integers would map R to 2 and ER to 02, which of course is |
63 | | - the same integer as 2. Therefore we prepend a 1 to every number, and R |
64 | | - becomes 12 and ER becomes 102." |
| 105 | + the same integer as 2. |
| 106 | + Instead of using integers, we use BIT-VECTOR because that allows us to |
| 107 | + efficiently compute and compare them." |
65 | 108 | (if (>= start (length digits)) |
66 | 109 | (format t "~a:~{ ~a~}~%" num (reverse words)) |
67 | 110 | (let ((found-word nil) |
68 | | - (n 1)) ; leading zero problem |
69 | | - (loop for i from start below (length digits) do |
70 | | - (setf n (+ (* 10 n) (nth-digit digits i))) |
71 | | - (loop for word in (gethash n *dict*) do |
72 | | - (setf found-word t) |
73 | | - (print-translations num digits (+ 1 i) (cons word words)))) |
74 | | - (when (and (not found-word) (not (numberp (first words)))) |
75 | | - (print-translations num digits (+ start 1) |
76 | | - (cons (nth-digit digits start) words)))))) |
| 111 | + (key (make-sequence 'bit-vector #.(* 25 8) :initial-element 1))) |
| 112 | + (loop for i from start below (length digits) |
| 113 | + for key-start from 0 by 4 and key-end from 4 by 4 do |
| 114 | + (setf (subseq key key-start key-end) (nth-digit digits i)) |
| 115 | + (loop for word in (gethash key *dict*) do |
| 116 | + (setf found-word t) |
| 117 | + (print-translations num digits (+ 1 i) (cons word words)))) |
| 118 | + (when (and (not found-word) (not (digitp (first words)))) |
| 119 | + (print-translations num digits (+ start 1) |
| 120 | + (cons (nth-digit-string digits start) words)))))) |
77 | 121 |
|
78 | 122 | (defun load-dictionary (file size) |
79 | 123 | "Create a hashtable from the file of words (one per line). Takes a hint |
80 | 124 | for the initial hashtable size. Each key is the phone number for a word; |
81 | 125 | each value is a list of words with that phone number." |
82 | | - (let ((table (make-hash-table :test #'eql :size size))) |
| 126 | + (let ((table (make-hash-table :test #'equal :size size))) |
83 | 127 | (with-open-file (in file) |
84 | 128 | (loop for word = (read-line in nil) while word do |
85 | 129 | (push word (gethash (word->number word) table)))) |
86 | 130 | table)) |
87 | 131 |
|
88 | | - |
89 | 132 | (defun main (&optional (dict "tests/words.txt") (nums "tests/numbers.txt") (dict-size 100)) |
90 | 133 | "Read the input file ¨DICT and load it into *dict*. Then for each line in |
91 | 134 | NUMS, print all the translations of the number into a sequence of words, |
|
0 commit comments