/
start.asm
561 lines (503 loc) · 11.7 KB
/
start.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
; "Tifflin" Kernel
; - By John Hodge (thePowersGang)
;
; arch/amd64/start.asm
; - AMD64/IA-32e boot shim
%include "Core/arch/amd64/common.inc.asm" ; WTF Nasm
[extern low_InitialPML4]
[section .multiboot]
[global mboot]
mboot:
%define MULTIBOOT_PAGE_ALIGN 1<<0
%define MULTIBOOT_MEMORY_INFO 1<<1
%define MULTIBOOT_REQVIDMODE 1<<2
%define MULTIBOOT_HEADER_MAGIC 0x1BADB002
%define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_PAGE_ALIGN | MULTIBOOT_MEMORY_INFO | MULTIBOOT_REQVIDMODE)
%define MULTIBOOT_CHECKSUM -(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
; This is the GRUB Multiboot header. A boot signature
dd MULTIBOOT_HEADER_MAGIC
dd MULTIBOOT_HEADER_FLAGS
dd MULTIBOOT_CHECKSUM
dd mboot
; a.out kludge (not used)
dd 0 ; load_addr
dd 0 ; load_end_addr
dd 0 ; bss_end_addr
dd 0 ; entry_addr
; Video mode
dd 0 ; Mode type (0: LFB)
;dd 0,0 ; Width, Height (no preference)
;dd 1600,900 ; Width, Height ('HD+')
dd 1366,768 ; Width, Height ('HD+')
;dd 1024,768 ; Width, Height ('HD+')
dd 32 ; Depth (32-bit preferred)
[section .inittext]
[BITS 32]
[global start]
start:
; NOTE: If this passes, it's being run in 64-bit mode
cmp ecx, 0x71FF0EF1
jz start_uefi
; 0. Save multboot state
mov [s_multiboot_signature - KERNEL_BASE], eax
or ebx, 0x80000000
mov [s_multiboot_pointer - KERNEL_BASE], ebx
; 1. Ensure that CPU is compatible
mov eax, 0x80000000
cpuid
cmp eax, 0x80000001 ; Compare the A-register with 0x80000001.
jb not64bitCapable
mov eax, 0x80000001
cpuid
test edx, 1<<29
jz not64bitCapable
mov dx, 0x3F8 ; Prepare for serial debug
mov al, 0x10
out dx, al
mov al, 'O'
out dx, al
mov al, 'K'
out dx, al
;; 2. Switch into IA-32e mode
; Enable:
; [4] PGE (Page Global Enable)
; + [5] PAE (Physical Address Extension)
; + [7] PSE (Page Size Extensions)
; + [ 9] OSFXSR (Operating System Support for FXSAVE and FXRSTOR instructions)
; + [10] OSXMMEXCPT (Operating System Support for Unmasked SIMD Floating-Point Exceptions)
mov eax, cr4
or eax, 0x80|0x20|0x10
or ax, (1 << 9)|(1 << 10)
mov cr4, eax
mov al, '4'
out dx, al
; Load PDP4
mov eax, low_InitialPML4
mov cr3, eax
mov al, '3'
out dx, al
; Enable IA-32e mode
; (Also enables SYSCALL and NX)
mov ecx, 0xC0000080
rdmsr
or eax, (1 << 11)|(1 << 8)|(1 << 0) ; NXE, LME, SCE
wrmsr
mov dx, 0x3F8
mov al, 'e'
out dx, al
; 3. Enable paging and enter long mode (enable SSE too)
; Set [31] PG (Paging enabled)
; Set [16] WP (Kernel write-protect)
; Set [3]TS (Enables #NM on all FPU instructions)
; Set [1]MP (with TS, Enables #NM when FWAIT is used)
; Clear [2]EM (Disables emulation of the FPU)
mov eax, cr0
or eax, 0x80010000|(1 << 3)|(1 << 1) ; PG & WP
and ax, ~(1 << 2)
mov cr0, eax
lgdt [GDTPtr - KERNEL_BASE]
jmp 0x08:start64
;;
;;
;;
not64bitCapable:
mov ah, 0x0F
mov dx, 0x3F8
mov edi, 0xB8000
mov esi, strNot64BitCapable
.loop:
lodsb
test al, al
jz .hlt
out dx, al
stosw
jmp .loop
.hlt:
cli
hlt
jmp .hlt
[BITS 64]
start_uefi:
mov [s_multiboot_signature - KERNEL_BASE], ecx
or edx, 0x80000000
mov [s_multiboot_pointer - KERNEL_BASE], edx
mov [s_multiboot_pointer - KERNEL_BASE + 4], DWORD 0xFFFFFFFF
;; 1. Enable a nice set of features
; Enable:
; [4] PGE (Page Global Enable)
; + [5] PAE (Physical Address Extension)
; + [7] PSE (Page Size Extensions)
; + [ 9] OSFXSR (Operating System Support for FXSAVE and FXRSTOR instructions)
; + [10] OSXMMEXCPT (Operating System Support for Unmasked SIMD Floating-Point Exceptions)
mov rax, cr4
or eax, 0x80|0x20|0x10
or ax, (1 << 9)|(1 << 10)
mov cr4, rax
; Enable IA-32e mode
; (Also enables SYSCALL and NX)
mov ecx, 0xC0000080
rdmsr
or eax, (1 << 11)|(1 << 8)|(1 << 0) ; NXE, LME, SCE
wrmsr
; Load PDP4
mov eax, low_InitialPML4
mov cr3, rax
; 2. Enable paging and enter long mode (enable SSE too)
; Set [31] PG (Paging enabled)
; Set [16] WP (Kernel write-protect)
; Set [3]TS (Enables #NM on all FPU instructions)
; Set [1]MP (with TS, Enables #NM when FWAIT is used)
; Clear [2]EM (Disables emulation of the FPU)
mov rax, cr0
or eax, 0x80010000|(1 << 3)|(1 << 1) ; PG & WP
and ax, ~(1 << 2)
mov cr0, rax
lgdt [DWORD GDTPtr - KERNEL_BASE]
; - Far jump (indirect)
jmp far [.ljmp_addr]
[section .initdata]
.ljmp_addr:
dq start64
dw 0x08
[section .inittext]
[extern prep_tls]
start64:
mov dx, 0x3F8
mov al, '6'
out dx, al
mov rax, start64_higher
jmp rax
[section .initdata]
strNot64BitCapable:
db "ERROR: CPU doesn't support 64-bit operation",0
[section .text]
[extern kmain]
start64_higher:
mov al, 'H'
out dx, al
; 4. Set true GDT base
lgdt [a32 DWORD GDTPtr2 - KERNEL_BASE]
; Load segment regs
mov ax, 0x10
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax
; 5. Initialise TLS for TID0
; - Use a temp stack for the following function
mov rsp, KSTACK_BASE+0x1000+1024
mov rax, KSTACK_BASE+0x1000
mov [rsp+14*8], rax
; - Pass the stack top, bottom, and TID0 pointer (null)
mov rdi, KSTACK_BASE+INITIAL_KSTACK_SIZE*0x1000
mov rsi, KSTACK_BASE+0x1000
mov rdx, 0
; - Prepare the TLS region
call prep_tls
; Switch to the real stack
mov [rel s_tid0_tls_base], rax
mov rsp, rax
; 5. Set up FS/GS base for kernel
mov rax, rsp
mov rdx, rax
shr rdx, 32
mov ecx, 0xC0000100 ; FS Base
wrmsr
mov ecx, 0xC0000101 ; GS Base
wrmsr
; 6. Request setup of IRQ handlers
call idt_init
mov dx, 0x3F8
mov al, 10
out dx, al
; Bind the 'SYSCALL' handler (and set flags for it)
; LSTAR = 0xC000_0082
mov rax, syscall_handler
mov rdx, rax
shr rdx, 32
mov ecx, 0xC0000082
wrmsr
; STAR = 0xC000_0081
mov eax, 0x00000000
mov edx, 0x001B0008 ; [63:48] User CS, [47:32] Kernel CS
mov ecx, 0xC0000081
wrmsr
; FMASK = 0xC000_0084
mov eax, 0x200 ; - Clear IF on SYSCALL
mov edx, 0
mov ecx, 0xC0000084
wrmsr
mov rax, InitialPML4
mov QWORD [rax], 0
; 7. Call rust kmain
call kmain
.dead_loop:
cli
hlt
jmp .dead_loop
%include "Core/arch/amd64/interrupts.inc.asm"
; RDI: Save location for RSP
; RSI: New RSP (pointer)
; RDX: New FSBASE
; RCX: New CR3
[section .text.asm.task_switch]
EXPORT task_switch
push rbp
mov rbp, rsp
SAVE rbx, r12, r13, r14, r15
; Perfom context save/restore
mov [rdi], rsp ; Save RSP
mov rsp, [rsi] ; New RSP
mov cr3, rcx ; New CR3
invlpg [rsp]
; Update stack top (RSP0) and TLS base (GS)
; TLS base and stack top are the same address.
; TODO: This assumes uniprocessor
mov [rel TSSes+tss.rsp0], rdx
mov rax, rdx
shr rdx, 32 ; EDX = High
mov ecx, 0xC0000101 ; GS Base
wrmsr
; Restore saved registers and return
RESTORE rbx, r12, r13, r14, r15
; - For debugging, reset RBP and trigger a tracepoint
;mov rbp, rsp
;int3
pop rbp
ret
[section .text]
EXPORT thread_trampoline
pop rax ; 1. Pop thread root method off stack
mov rdi, rsp ; 2. Set RDI to the object to call
jmp rax ; 3. Jump to the thread root method, which should never return
; RDI: IP
; RDI: SP
; RDX: Arg
EXPORT drop_to_user
mov rcx, rdi ; Set IP for SYSRET
pushf
cli
pop r11 ; Set RFLAGS for SYSRET
swapgs
mov ax, 0x23
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov rsp, rsi ; User's stack
mov rax, rdx ; Argument passed in RAX
db 0x48
sysret
; -------------------------------------------------
; System Calls
; -------------------------------------------------
[section .text.asm.syscall_handler]
; RAX, RDI, RSI, RDX, [RCX/R10], R8, R9
EXPORT syscall_handler
; RCX = RIP, R11 = EFLAGS
; NOTE: If an interrupt happens between here and the load of `RSP`,
; there can be state corruption.
; - RFLAGS has IF cleared (loaded state)
; - An NMI ignores that, but _should_ be using its own stack
; TODO TODO TODO Actually use the IST for NMI
; >>> Switch to kernel stack
; - The format of 'gs' is specified in arch/amd64/threads.rs (TLSData)
swapgs
mov [gs:0x10], rsp ; Save user's RSP
mov rsp, [gs:0x8] ; and load kernels
; >>> Save user state
SAVE rcx, r11 ; RCX = userland IP, R11 = userland EFLAGS
; >>> Push args (ready to be passed as slice)
SAVE rdi, rsi, rdx, r10, r8, r9
sti
mov rdi, rax
mov rsi, rsp
mov rdx, 6
[extern syscalls_handler]
call syscalls_handler
; "pop" the arguments
RESTORE rdi, rsi, rdx, r10, r8, r9
; All done
; >>> Restore RCX/R11 for sysret
RESTORE rcx, r11
; >>> Restore user's SP
mov rsp, [gs:0x10]
; >>> TODO: Restore user's FS
; >>> Restore GS
swapgs
; sysretq (no opcode for it in nasm)
; - Returns to 64-bit mode, let's ignore compat mode
db 0x48
sysret
; -------------------------------------------------
; Helpers
; -------------------------------------------------
[section .text]
EXPORT __morestack
jmp abort
abort:
ud2
cli
hlt
jmp abort
;; RDI = Address
;; RSI = Value
;; RDX = Count
EXPORT memset
mov rax, rsi
mov rcx, rdx
rep stosb
ret
;; RDI = Destination
;; RSI = Source
;; RDX = Count
EXPORT memcpy
mov rax, rdi ; Prepare to return RDI
mov rcx, rdx
; Check if a word-wise copy is possible
test di, 7
jnz .byte
test si, 7
jnz .byte
test cx, 7
jnz .byte
shr rcx, 3
rep movsq
ret
.byte:
rep movsb
ret
;; RDI = Destination
;; RSI = Source
;; RDX = Count
EXPORT memmove
cmp rdi, rsi
jz .ret ; if RDI == RSI, do nothinbg
jb memcpy ; if RDI < RSI, it's safe to do a memcpy
add rsi, rdx ; RDI > RSI
cmp rdi, rsi
jae memcpy ; if RDI >= RSI + RDX, then the two regions don't overlap, and memcpy is safe
; Reverse copy (add count to both addresses, and set DF)
add rdi, rdx
dec rdi
dec rsi
std
mov rcx, rdx
rep movsb
cld
.ret:
ret
;; RDI = A
;; RSI = B
;; RDX = Count
EXPORT memcmp
test rdx, rdx
mov rcx, rdx
rep cmpsb
mov rax, 0
ja .pos
jb .neg
.eq:
ret
.pos:
dec rax
ret
.neg:
inc rax
ret
;; RDI = str
EXPORT strlen
mov rsi, rdi
mov rcx, 0
.loop:
lodsb
test al, al
loopnz .loop
neg rcx
mov rax, rcx
ret
EXPORT _Unwind_Resume
jmp $
%include "Core/arch/amd64/stubs.inc.asm"
[section .padata]
[global InitialPML4]
InitialPML4: ; Covers 256 TiB (Full 48-bit Virtual Address Space)
dd InitialPDP - KERNEL_BASE + 3, 0 ; Identity Map Low 4Mb
times 0xA0*2-1 dq 0
; Stacks at 0xFFFFA...
dd StackPDP - KERNEL_BASE + 3, 0
times 512-4-($-InitialPML4)/8 dq 0 ; < dq until hit 512-4
dd InitialPML4 - KERNEL_BASE + 3, 0
dq 0
dq 0
dd HighPDP - KERNEL_BASE + 3, 0 ; Map Low 4Mb to kernel base
[global InitialPDP]
InitialPDP: ; Covers 512 GiB
dd InitialPD - KERNEL_BASE + 3, 0
times 511 dq 0
StackPDP:
dd StackPD - KERNEL_BASE + 3, 0
times 511 dq 0
HighPDP: ; Covers 512 GiB
times 510 dq 0
dd InitialPD - KERNEL_BASE + 3, 0
dq 0
[global InitialPD]
InitialPD: ; Covers 1 GiB
dd 0x000000 + 0x183,0 ; Global, 2MiB
dd 0x200000 + 0x183,0 ; Global, 2MiB
times 510 dq 0
StackPD:
dd KStackPT - KERNEL_BASE + 3, 0
times 511 dq 0
KStackPT: ; Covers 2 MiB
; Initial stack - 64KiB
dq 0
%assign i 0
%rep INITIAL_KSTACK_SIZE-1
dd InitialKernelStack - KERNEL_BASE + i*0x1000 + 0x103, 0
%assign i i+1
%endrep
times 512-INITIAL_KSTACK_SIZE dq 0
InitialKernelStack:
times 0x1000*(INITIAL_KSTACK_SIZE-1) db 0 ; 8 Pages
[global EmergencyStack]
times 0x1000*(INITIAL_KSTACK_SIZE-1) db 0 ; 8 Pages
EmergencyStack:
[section .rodata]
[section .data]
EXPORT s_multiboot_pointer
dd 0
dd 0xFFFFFFFF
EXPORT s_multiboot_signature
dd 0
EXPORT GDT
dd 0, 0
dd 0x00000000, 0x00209A00 ; 0x08: 64-bit Code
dd 0x00000000, 0x00009200 ; 0x10: 64-bit Data
dd 0x00000000, 0x0040FA00 ; 0x18: 32-bit User Code
dd 0x00000000, 0x0040F200 ; 0x20: User Data
dd 0x00000000, 0x0020FA00 ; 0x28: 64-bit User Code
dd 0x00000000, 0x0000F200 ; 0x30: User Data (64 version)
.first_tss:
times MAX_CPUS dd 0, 0x00008900, 0, 0 ; 0x38+16*n: TSS 0
GDTPtr:
dw $-GDT-1
dd GDT - KERNEL_BASE
dd 0
GDTPtr2:
dw GDTPtr-GDT-1
dq GDT
EXPORT IDT
; 64-bit Interrupt Gate, CS = 0x8, IST0 (Disabled)
times 256 dd 0x00080000, 0x00000E00, 0, 0
IDTPtr:
dw 256*16-1
dq IDT
EXPORT s_tid0_tls_base
dq 0
[section .bss]
EXPORT TSSes
times MAX_CPUS resb tss.SIZE
; vim: ft=nasm