/
assembler.clj
253 lines (230 loc) · 9.7 KB
/
assembler.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
(ns lithium.assembler
(:require [clojure.string :as string])
(:use [clojure.java.shell :only [sh]]))
(defmacro deftable [name headers & data]
`(def ~name
(into {}
(for [~(vec headers) ~(vec (map vec (partition (count headers) data)))]
{~(first headers) (zipmap ~(vec (map keyword (rest headers))) ~(vec (rest headers)))}))))
(deftable +registers+
[reg size value type]
:ax 16 0 :general
:bx 16 3 :general
:cx 16 1 :general
:dx 16 2 :general
:sp 16 4 :general
:bp 16 5 :general
:si 16 6 :general
:di 16 7 :general
:cs 16 1 :segment
:ds 16 3 :segment
:es 16 0 :segment
:ss 16 2 :segment
:al 8 0 :general
:ah 8 4 :general
:bl 8 3 :general
:bh 8 7 :general
:cl 8 1 :general
:ch 8 5 :general
:dl 8 2 :general
:dh 8 6 :general)
(def +condition-codes+
{:o 0 :no 1 :b 2 :c 2 :nae 2 :ae 3 :nb 3 :nc 3 :e 4 :z 4 :ne 5 :nz 5 :be 6 :na 6 :a 7 :nbe 7
:s 8 :ns 9 :p 10 :pe 10 :np 11 :po 11 :l 12 :nge 12 :ge 13 :nl 13 :le 14 :ng 14 :g 15 :nle 15})
(def +memory-widths+ #{:byte :word})
(defn modrm
[mod spare rm]
(+ (bit-shift-left mod 6)
(bit-shift-left spare 3)
rm))
(defn reg8 [x] (let [info (+registers+ x)] (and info (= (:type info) :general) (= (:size info) 8))))
(defn reg16 [x] (let [info (+registers+ x)] (and info (= (:type info) :general) (= (:size info) 16))))
(defn sreg [x] (let [info (+registers+ x)] (and info (= (:type info) :segment))))
(defn imm8 [x] (and (integer? x) (<= 0 x 255)))
(defn imm16 [x] (or (and (integer? x) (<= 0 x 65535)) (keyword? x)))
(defn mem [x] (vector? x))
(defn width [x] (first (filter +memory-widths+ x)))
(defn mem8 [x] (and (mem x) (let [w (width x)] (or (nil? w) (= w :byte)))))
(defn mem16 [x] (and (mem x) (let [w (width x)] (or (nil? w) (= w :word)))))
(defn rm8 [x] (or (reg8 x) (mem8 x)))
(defn rm16 [x] (or (reg16 x) (mem16 x)))
(defn label [x] (keyword? x))
(def assembly-table
[[:mov rm8 reg8] [0x88 :r]
[:mov rm16 reg16] [0x89 :r]
[:mov reg8 rm8] [0x8a :r]
[:mov reg16 rm16] [0x8b :r]
[:mov reg8 imm8] [[:r+ 0xb0] :ib]
[:mov reg16 imm16] [[:r+ 0xb8] :iw]
[:mov sreg rm16] [0x8e :r]
[:mov rm8 imm8] [0xc6 :0 :ib]
[:mov rm16 imm16] [0xc7 :0 :iw]
[:xor rm8 reg8] [0x30 :r]
[:xor rm16 reg16] [0x31 :r]
[:push reg16] [[:r+ 0x50]]
[:pop reg16] [[:r+ 0x58]]
[:push :cs] [0x0e]
[:pushf] [0x9c]
[:pusha] [0x60]
[:stosb] [0xaa]
[:stosw] [0xab]
[:movsw] [0xa5]
[:rol rm16 imm8] [0xc1 :0 :ib]
[:daa] [0x27]
[:ret] [0xc3]
[:cli] [0xfa]
[:inc reg16] [[:r+ 0x40]]
[:inc rm8] [0xfe :0]
[:dec reg16] [[:r+ 0x48]]
[:dec rm8] [0xfe :1]
[:cmp :al imm8] [0x3c :ib]
[:cmp :ax imm16] [0x3d :iw]
[:cmp rm8 imm8] [0x80 :7 :ib]
[:cmp rm16 imm16] [0x81 :7 :iw]
[:cmp reg8 rm8] [0x3a :r]
[:cmp reg16 rm16] [0x3b :r]
[:adc :al imm8] [0x14 :ib]
[:add rm8 reg8] [0x00 :r]
[:add rm16 reg16] [0x01 :r]
[:add reg8 rm8] [0x02 :r]
[:add reg16 rm16] [0x03 :r]
[:add :al imm8] [0x04 :ib]
[:add :ax imm16] [0x05 :iw]
[:add rm8 imm8] [0x80 :0 :ib]
[:add rm16 imm16] [0x81 :0 :iw]
[:sub rm8 imm8] [0x80 :5 :ib]
[:sub rm16 imm8] [0x83 :5 :ib]
[:sub rm16 imm16] [0x81 :5 :iw]
[:sub reg8 rm8] [0x2a :r]
[:sub reg16 rm16] [0x2b :r]
[:and rm8 imm8] [0x80 :4 :ib]
[:and rm16 imm8] [0x83 :4 :ib]
[:and rm16 imm16] [0x81 :4 :iw]
[:mul rm16] [0xf7 :4]
[:mul rm8] [0xf6 :4]
[:div rm16] [0xf7 :6]
[:div rm8] [0xf6 :6]
[:sal rm8 1] [0xd0 :4]
[:sal rm8 imm8] [0xc0 :4 :ib]
[:sal rm16 1] [0xd1 :4]
[:sal rm16 imm8] [0xc1 :4 :ib]
[:sar rm8 1] [0xd0 :7]
[:sar rm8 imm8] [0xc0 :7 :ib]
[:sar rm16 1] [0xd1 :7]
[:sar rm16 imm8] [0xc1 :7 :ib]
[:stosb] [0xaa]
[:or rm8 imm8] [0x80 :1 :ib]
[:or rm16 imm16] [0x81 :1 :iw]
[:jCC label] [[:cc+ 0x70] :rb]
[:setCC rm8] [0x0f [:cc+ 0x90] :2]
[:loop label] [0xe2 :rb]
[:jmp rm16] [0xff :4]
[:jmp label] [0xe9 :rw]
[:call rm16] [0xff :2]
[:call label] [0xe8 :rw]
[:int 3] [0xcc]
[:int imm8] [0xcd :ib]])
(defn extract-cc [instr template]
(let [re (re-pattern (string/replace (name template) "CC" "(.+)"))]
(when-let [cc-s (second (re-find re (name instr)))]
(keyword cc-s))))
(defn part-of-spec-matches? [datum template]
(if (fn? template) (template datum) (= datum template)))
(defn instruction-matches? [instr [template _]]
(let [f1 (first instr)
f2 (first template)]
(and (or (= (name f1) (name f2))
((set (keys +condition-codes+)) (extract-cc f1 f2)))
(= (count instr) (count template))
(reduce #(and %1 %2) true (map part-of-spec-matches? (rest instr) (rest template))))))
(defn find-template [instr]
(first (filter (partial instruction-matches? instr)
(partition 2 assembly-table))))
(defn make-label [label width]
(keyword (or (namespace label) (name width)) (name label)))
(defn label?
([x] (keyword? x))
([x type] (and (keyword? x) (= (namespace x) (name type)))))
(defn word-to-bytes [[size w]]
(let [w (if (and (integer? w) (neg? w)) (+ w (bit-shift-left 1 size)) w)]
(condp = size
0 []
8 [w]
16 (if (keyword? w) [:placeholder (make-label w :abs)] [(bit-and w 0xff) (bit-shift-right w 8)]))))
(defn lenient-parse-int [x]
(try
(Integer/parseInt x)
(catch NumberFormatException _ nil)))
(defn make-modrm [rm-desc spare]
(if (keyword? rm-desc)
[(modrm 3 spare (-> rm-desc +registers+ :value))]
(let [rm-desc (remove +memory-widths+ rm-desc)
registers (vec (sort-by name (filter keyword? rm-desc)))
displacement (reduce + 0 (filter integer? rm-desc))
rm-map {[:bx :si] 0 [:bx :di] 1 [:bp :si] 2 [:bp :di] 3 [:si] 4 [:di] 5 [:bp] 6 [] 6 [:bx] 7}
mod (cond
(or (and (zero? displacement) (not= registers [:bp])) (empty? registers)) 0
(or (and (zero? displacement) (= registers [:bp])) (<= -128 displacement 127)) 1
(<= -32768 displacement 32767) 2)
rm (rm-map registers)]
(when-not rm
(throw (Exception. (format "Incorrect memory reference: %s" rm-desc))))
(into [(modrm mod spare rm)] (word-to-bytes [(* 8 (if (empty? registers) 2 mod)) displacement])))))
(defn parse-byte [[instr op1 op2] [instr-template op1-template op2-template] byte-desc]
(let [imm (cond (#{imm8 imm16} op1-template) op1 (#{imm8 imm16} op2-template) op2)
rm (cond (#{rm8 rm16} op1-template) op1 (#{rm8 rm16} op2-template) op2)
not-rm (if (= rm op1) op2 op1)]
(cond
(integer? byte-desc) [byte-desc]
(= byte-desc :ib) (word-to-bytes [8 imm])
(= byte-desc :iw) (word-to-bytes [16 imm])
(= byte-desc :rb) [(make-label op1 :byte)]
(= byte-desc :rw) [:placeholder (make-label op1 :word)]
(and (keyword? byte-desc) (lenient-parse-int (name byte-desc)))
(make-modrm rm (lenient-parse-int (name byte-desc)))
(= byte-desc :r)
(make-modrm rm (-> not-rm +registers+ :value))
(and (sequential? byte-desc) (= (first byte-desc) :r+))
[(+ (second byte-desc) (-> op1 +registers+ :value))]
(and (sequential? byte-desc) (= (first byte-desc) :cc+))
[(+ (second byte-desc) (-> instr (extract-cc instr-template) +condition-codes+))])))
(defn assemble-instruction [instr]
(cond
(= (first instr) 'string) (map int (second instr))
(= (first instr) 'bytes) (second instr)
:otherwise
(let [[template parts] (find-template instr)]
(when-not template (throw (Exception. (str "Could not assemble instruction: " (pr-str instr)))))
(let [assembled-parts (map (partial parse-byte instr template) parts)]
(apply concat assembled-parts)))))
;; This is the value added to absolute addresses of labels, telling
;; the assembler where the code starts from. Defaults to 0x100 for
;; compatibility with COM format.
(def ^:dynamic *origin* 0x100)
(defn resolve-labels [code labels]
(loop [result [] code code pos 0]
(if-let [fb (first code)]
(recur
(cond (= fb :placeholder) result
(label? fb :byte) (into result (word-to-bytes [8 (dec (- (-> fb name keyword labels) pos))]))
(label? fb :word) (into result (word-to-bytes [16 (dec (- (-> fb name keyword labels) pos))]))
(label? fb :abs) (into result (word-to-bytes [16 (+ *origin* (-> fb name keyword labels))]))
:otherwise (conj result fb))
(next code) (inc pos))
result)))
(defn strip-comments [prog]
(remove #(and (vector? %) (= (first %) 'comment)) prog))
(defn asm [prog]
(loop [prog (strip-comments prog) code [] pc 0 labels {}]
(if-not (seq prog)
(resolve-labels code labels)
(let [ins (first prog)]
(if (keyword? ins)
(recur (next prog) code pc (assoc labels ins pc))
(let [assembled (assemble-instruction ins)
cnt (count assembled)]
(recur (next prog) (into code assembled) (+ pc cnt) labels)))))))
(defn assemble [prog]
(asm (if (string? prog)
(read-string (str "[" (slurp prog) "]"))
prog)))