Permalink
Cannot retrieve contributors at this time
304 lines (273 sloc)
6.51 KB
| ; | |
| ; generic vram update engine for GB | |
| ; | |
| ; Copyright 2018-2019 Damian Yerrick | |
| ; | |
| ; This software is provided 'as-is', without any express or implied | |
| ; warranty. In no event will the authors be held liable for any damages | |
| ; arising from the use of this software. | |
| ; | |
| ; Permission is granted to anyone to use this software for any purpose, | |
| ; including commercial applications, and to alter it and redistribute it | |
| ; freely, subject to the following restrictions: | |
| ; | |
| ; 1. The origin of this software must not be misrepresented; you must not | |
| ; claim that you wrote the original software. If you use this software | |
| ; in a product, an acknowledgment in the product documentation would be | |
| ; appreciated but is not required. | |
| ; 2. Altered source versions must be plainly marked as such, and must not be | |
| ; misrepresented as being the original software. | |
| ; 3. This notice may not be removed or altered from any source distribution. | |
| ; | |
| include "src/hardware.inc" | |
| include "src/global.inc" | |
| ; Each packet starts with a 4-byte header: | |
| ; dst low, dst high, count/flags, first byte | |
| ; | |
| ; The count/flags byte: | |
| ; 7654 3210 | |
| ; ||++-++++- lengthvalue (length minus 1) | |
| ; |+-------- Type (0: literal; 1: run) | |
| ; +--------- Add (0: 1; 1: 32) | |
| ; | |
| ; +1 run: write first byte lengthvalue+1 times, adding 1 to dst | |
| ; +32 run: write first byte lengthvalue+1 times, adding 32 to dst | |
| ; +1 literal: write first byte if lengthvalue is even, | |
| ; then copy next ceil(lengthvalue / 2) * 2 bytes, adding 1 to dst | |
| ; +32 literal: write first byte if lengthvalue is even, | |
| ; then copy next ceil(lengthvalue / 2) * 2 bytes, adding 32 to dst | |
| HORIZONTAL_MAX = 64 | |
| VERTICAL_MAX = 32 | |
| if VERTICAL_MAX <= 32 | |
| LOG2_VERTICAL_MAX = 5 | |
| else | |
| LOG2_VERTICAL_MAX = 6 | |
| endc | |
| ; Make `di halt` safe on VisualBoyAdvance 1.7.2 | |
| if vbacompat | |
| section "stat_handler", ROM0[$48] | |
| reti | |
| endc | |
| section "popslide_buf",WRAM0,align[8] | |
| popslide_headroom: ds 8 | |
| popslide_buf:: ds 192 | |
| popslide_used:: ds 1 | |
| popslide_sp_save: ds 2 | |
| section "popslide_run_h",ROM0,align[6] | |
| run_h_base: | |
| rept 64 | |
| ld [hl+],a | |
| endr | |
| jp popslide_next_packet | |
| section "popslide_run_v",ROM0,align[LOG2_VERTICAL_MAX + 1] | |
| run_v_base: | |
| rept VERTICAL_MAX + (-1) | |
| ld [hl],a | |
| add hl,de | |
| endr | |
| ld [hl],a | |
| jp popslide_next_packet | |
| section "popslide_literal_h",ROM0,align[8] | |
| literal_h_base:: | |
| rept 32 | |
| pop bc | |
| ld a,c | |
| ld [hl+],a | |
| ld a,b | |
| ld [hl+],a | |
| endr | |
| popslide_next_packet: | |
| pop hl | |
| bit 7,h | |
| jr nz,.not_done | |
| ; Restore old stack pointer | |
| ; Optimization thanks to NieDzejkob in gbdev discord | |
| ld sp, popslide_sp_save | |
| pop hl | |
| ld sp, hl | |
| ret | |
| .not_done: | |
| pop bc | |
| bit 7,c | |
| jr nz,.is_v | |
| bit 6,c | |
| jr nz,.is_run_h | |
| ; $00-$3F: Literal of length C+1 written horizontally | |
| inc c | |
| bit 0,c | |
| jr z,.run_h_not_odd | |
| ld a,b | |
| ld [hl+],a | |
| .run_h_not_odd: | |
| srl c | |
| jr z,popslide_next_packet | |
| ld a,c ; A = C = number of byte pairs | |
| add a | |
| add a | |
| add c ; A = 5 * number of copies | |
| cpl ; 255 - 5 * (number of copies) | |
| sub 255-(low(literal_h_base) + (HORIZONTAL_MAX / 2) * 5) | |
| ld c,a | |
| ld b,high(literal_h_base) | |
| push bc | |
| ret | |
| .is_run_h: | |
| ; $40-$7F: C-63 bytes of value B written horizontally | |
| ld a,low(run_h_base + 191 - HORIZONTAL_MAX) | |
| sub c | |
| ld c,a | |
| ld a,b | |
| ld b,high(run_h_base) | |
| push bc | |
| ret | |
| .is_v: | |
| bit 6,c | |
| jr nz,.is_run_v | |
| ; $80-$BF: Literal of length C-127 written vertically | |
| inc c | |
| bit 0,c | |
| jr z,.run_v_not_odd | |
| ld a,b | |
| ld [hl],a | |
| add hl,de | |
| .run_v_not_odd: | |
| res 7,c | |
| srl c | |
| jr z,popslide_next_packet | |
| ld a,c ; A = C = number of copies | |
| add a | |
| add a | |
| add a | |
| sub c ; A = 7 * number of copies | |
| cpl ; 255 - 7 * (number of copies) | |
| sub 255-(low(literal_v_base) + (VERTICAL_MAX / 2) * 7) | |
| ld c,a | |
| ld b,high(literal_v_base) | |
| push bc | |
| ret | |
| .is_run_v: | |
| ; $C0-$FF: C-191 bytes of value B written vertically | |
| ld a,low(run_v_base) + 2 * VERTICAL_MAX - 130 | |
| sub c | |
| sub c | |
| ld c,a | |
| ld a,b | |
| ld b,high(run_v_base) | |
| push bc | |
| ret | |
| section "popslide_literal_v",ROM0,align[LOG2_VERTICAL_MAX + 2] | |
| literal_v_base: | |
| rept VERTICAL_MAX / 2 | |
| pop bc | |
| ld a,c | |
| ld [hl],a | |
| add hl,de | |
| ld a,b | |
| ld [hl],a | |
| add hl,de | |
| endr | |
| jp popslide_next_packet | |
| section "popslide_public",ROM0 | |
| ;; | |
| ; Clears the entire Popslide buffer to ensure no uninitialized reads | |
| popslide_init:: | |
| ld hl,popslide_buf | |
| ld a,l | |
| ld [popslide_used],a | |
| ld c,low(popslide_used - popslide_buf) | |
| xor a | |
| jp memset_tiny | |
| popslide_terminate_blit:: | |
| ld hl,popslide_used | |
| ld a,[hl] ; A = old fill level | |
| .do_a: | |
| ld [hl],low(popslide_buf) ; Initialize fill level for next transaction | |
| ld l,a | |
| xor a | |
| ld [hl+],a ; Terminate at old fill level | |
| ld [hl],a | |
| ; Fall through | |
| popslide_blit:: | |
| ld [popslide_sp_save],sp | |
| ld sp,popslide_buf | |
| ld de,32 ; for vertical packets | |
| jp popslide_next_packet | |
| ; Hblank bulk copy | |
| ; for copying bulk data (circa 1K per frame) without needing to | |
| ; disable rendering | |
| ;; | |
| ; Copies bulk data during horizontal blanking periods, | |
| ; disabling interrupts for the duration. | |
| ; | |
| ; DMG scanline is 456 dots, of which 80 are OAM scan, 176 plus | |
| ; window plus sprites are draw, and the rest is hblank. | |
| ; Sprites are 6-11 dots each depending on relative alignment with | |
| ; the background, and exact alignment with a background tile is | |
| ; slowest. Window is about 6 dots if enabled on a scanline. | |
| ; This leaves at least 84 dots, or 21 cycles at 4 cycles per dot. | |
| ; We can write to VRAM during the 21+ cycles of hblank plus the | |
| ; 20 cycles of the following line's OAM scan. | |
| ; | |
| ; @param HL source address (usually ROM or WRAM) | |
| ; @param DE destination address (usually VRAM) | |
| ; @param C number of 8-byte units to copy (usually 1 to 128) | |
| ; @return C = 0; IME on | |
| popslide_hblank_copy:: | |
| ; Save which interrupts were enabled and the old stack pointer | |
| di | |
| ldh a, [rIE] | |
| push af | |
| ldh a, [rSTAT] | |
| push af | |
| ld [popslide_sp_save],sp | |
| ; Set hblank as only interrupt that ends HALT | |
| ld a, IEF_LCDC | |
| ld [rIE], a | |
| ld a, STATF_MODE00 | |
| ld [rSTAT], a | |
| ; Point the stack at the source | |
| ld sp,hl | |
| ld hl, rLCDC | |
| set 7, [hl] | |
| ld h,d | |
| ld l,e | |
| .rowloop: | |
| xor a | |
| ldh [rIF], a | |
| ; Read first four bytes into A, B, E, D before hblank | |
| pop de | |
| ld b, d | |
| ld a, e | |
| pop de | |
| halt | |
| ; Write four already-popped bytes (14 cycles) | |
| ld [hl+], a | |
| ld a, b | |
| ld [hl+], a | |
| ld a, e | |
| ld [hl+], a | |
| ld a, d | |
| ld [hl+], a | |
| ; Pop and write next four bytes (18 cycles) | |
| pop de | |
| ld a, e | |
| ld [hl+], a | |
| ld a, d | |
| ld [hl+], a | |
| pop de | |
| ld a, e | |
| ld [hl+], a | |
| ld a, d | |
| ld [hl+], a | |
| dec c | |
| jr nz, .rowloop | |
| ; Restore the stack and interrupt settings | |
| ; Optimization thanks to NieDzejkob in gbdev | |
| ld sp, popslide_sp_save | |
| pop hl | |
| ld sp, hl | |
| pop af | |
| ldh [rSTAT], a | |
| pop af | |
| ldh [rIE],a | |
| reti |