Skip to content

Commit

Permalink
Improve performance of oam_table.ml.14fps -> 24fps
Browse files Browse the repository at this point in the history
  • Loading branch information
linoscope committed Oct 28, 2021
1 parent 4545c94 commit 47989b7
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 90 deletions.
1 change: 0 additions & 1 deletion lib/camlboy.ml
Expand Up @@ -104,7 +104,6 @@ module Make (Cartridge : Cartridge_intf.S) = struct
let oam_table = Oam_table.create
~start_addr:(of_int 0xFE00)
~end_addr:(of_int 0xFE9F)
~oam_ram:(Ram.create ~start_addr:(of_int 0xFE00) ~end_addr:(of_int 0xFE9F))
in
let lcd_stat = Lcd_stat.create ~addr:lcd_stat_addr in
let joypad = Joypad.create
Expand Down
64 changes: 32 additions & 32 deletions lib/gpu/gpu.ml
Expand Up @@ -129,38 +129,38 @@ let render_sprite_line t ly =
| `_8x16 -> 16
in
Oam_table.get_all_sprites t.oam
|> List.filter (fun sprite -> sprite.y_pos <= ly && ly <= sprite.y_pos + y_sprite_size - 1)
|> List.iter (fun sprite ->
let row = ly - sprite.y_pos in
let pallete = match sprite.pallete with
| `OBP0 -> t.obp0
| `OBP1 -> t.obp1
in
for col = 0 to 7 do
let lx = sprite.x_pos + col in
if lx < 0 || lx >= screen_w then
()
else
let color_id = Tile_data.get_pixel t.td
~area:Area1
~index:sprite.tile_index
~row:(if sprite.y_flip then y_sprite_size - row - 1 else row)
~col:(if sprite.x_flip then 7 - col else col)
in
match color_id with
| ID_00 ->
() (* transparant *)
| ID_01 | ID_10 | ID_11 ->
match sprite.priority, t.frame_buffer.(ly).(lx) with
| `Sprite_top, _
| _, `White ->
let color = Pallete.lookup pallete color_id in
t.frame_buffer.(ly).(lx) <- color
| `Sprite_bottom, `Black
| `Sprite_bottom, `Dark_gray
| `Sprite_bottom, `Light_gray ->
()
done)
|> Array.iter (fun sprite ->
if sprite.y_pos <= ly && ly <= sprite.y_pos + y_sprite_size - 1 then
let row = ly - sprite.y_pos in
let pallete = match sprite.pallete with
| `OBP0 -> t.obp0
| `OBP1 -> t.obp1
in
for col = 0 to 7 do
let lx = sprite.x_pos + col in
if lx < 0 || lx >= screen_w then
()
else
let color_id = Tile_data.get_pixel t.td
~area:Area1
~index:sprite.tile_index
~row:(if sprite.y_flip then y_sprite_size - row - 1 else row)
~col:(if sprite.x_flip then 7 - col else col)
in
match color_id with
| ID_00 ->
() (* transparant *)
| ID_01 | ID_10 | ID_11 ->
match sprite.priority, t.frame_buffer.(ly).(lx) with
| `Sprite_top, _
| _, `White ->
let color = Pallete.lookup pallete color_id in
t.frame_buffer.(ly).(lx) <- color
| `Sprite_bottom, `Black
| `Sprite_bottom, `Dark_gray
| `Sprite_bottom, `Light_gray ->
()
done)

let render_line t =
let ly = Lcd_position.get_ly t.lp in
Expand Down
111 changes: 61 additions & 50 deletions lib/gpu/oam_table.ml
Expand Up @@ -8,66 +8,77 @@ type sprite = {
y_flip : bool;
x_flip : bool;
pallete : [`OBP0 | `OBP1];
tile_vram_bank : [`Bank0 | `Bank1];
pallete_num : int;
} [@@deriving show]

type t = {
oam_ram : Ram.t;
sprites : sprite array;
start_addr : uint16;
end_addr : uint16;
}

let create ~start_addr ~end_addr ~oam_ram = { start_addr; end_addr; oam_ram; }

let read_byte t = Ram.read_byte t.oam_ram
let create ~start_addr ~end_addr =
let default_sprite = {
y_pos = 0;
x_pos = 0;
tile_index = Uint8.zero;
priority = `Sprite_top;
y_flip = false;
x_flip = false;
pallete = `OBP0;
tile_vram_bank = `Bank0;
pallete_num = 0; }
in
let sprites = Array.make 40 default_sprite in
{ sprites;
start_addr;
end_addr; }

let write_byte t = Ram.write_byte t.oam_ram
let read_byte t addr =
let offset = Uint16.to_int addr - (Uint16.to_int t.start_addr) in
let sprite = t.sprites.(offset / 4) in
match offset mod 4 with
| 0 -> sprite.y_pos + 16 |> Uint8.of_int
| 1 -> sprite.x_pos + 8 |> Uint8.of_int
| 2 -> sprite.tile_index
| 3 ->
Bit_util.byte_of_bitflags
(sprite.priority = `Sprite_bottom)
sprite.y_flip
sprite.x_flip
(sprite.pallete = `OBP1)
(sprite.tile_vram_bank = `Bank1)
(sprite.pallete_num land 0b100 <> 0)
(sprite.pallete_num land 0b010 <> 0)
(sprite.pallete_num land 0b001 <> 0)
| _ -> assert false

let accepts t = Ram.accepts t.oam_ram
let write_byte t ~addr ~data =
let offset = Uint16.to_int addr - (Uint16.to_int t.start_addr) in
let data' = Uint8.to_int data in
let sprite_index = offset / 4 in
match offset mod 4 with
| 0 ->
t.sprites.(sprite_index) <- {t.sprites.(sprite_index) with y_pos = (data' - 16)}
| 1 ->
t.sprites.(sprite_index) <- {t.sprites.(sprite_index) with x_pos = (data' - 8)}
| 2 ->
t.sprites.(sprite_index) <- {t.sprites.(sprite_index) with tile_index = data}
| 3 ->
let (b7, y_flip, x_flip, b4, b3, b2, b1, b0) = data |> Bit_util.bitflags_of_byte in
let priority = if b7 then `Sprite_bottom else `Sprite_top in
let pallete = if b4 then `OBP1 else `OBP0 in
let tile_vram_bank = if b3 then `Bank1 else `Bank0 in
let pallete_num = (Bool.to_int b2 lsl 2) lor (Bool.to_int b1 lsl 1) lor (Bool.to_int b0) in
t.sprites.(sprite_index) <-
{ t.sprites.(sprite_index) with
y_flip; x_flip; priority; pallete; tile_vram_bank; pallete_num; }
| _ -> assert false

let get_sprite t ~index =
let offset = (Uint16.to_int t.start_addr) + (index * 4) in
let y_pos =
offset
|> Uint16.of_int
|> Ram.read_byte t.oam_ram
|> Uint8.to_int
|> (fun y -> y - 16)
in
let x_pos =
(offset + 1)
|> Uint16.of_int
|> Ram.read_byte t.oam_ram
|> Uint8.to_int
|> (fun x -> x - 8)
in
let tile_index =
(offset + 2)
|> Uint16.of_int
|> Ram.read_byte t.oam_ram
in
let (b7, y_flip, x_flip, b4, _, _, _, _) =
(offset + 3)
|> Uint16.of_int
|> Ram.read_byte t.oam_ram
|> Bit_util.bitflags_of_byte
in
let priority = if b7 then `Sprite_bottom else `Sprite_top in
let pallete = if b4 then `OBP1 else `OBP0 in
{
y_pos;
x_pos;
tile_index;
priority;
y_flip;
x_flip;
pallete;
}
let accepts t addr = Uint16.(t.start_addr <= addr && addr <= t.end_addr)

(* TODO: Optimize *)
let sprite_indexes = List.init 40 (fun i -> i)
let get_all_sprites t =
sprite_indexes
|> List.map (fun index -> get_sprite t ~index)
let get_all_sprites t = t.sprites

let write_with_offset t ~offset ~data =
Ram.write_byte t.oam_ram ~addr:Uint16.(t.start_addr + of_int offset) ~data
write_byte t ~addr:Uint16.(t.start_addr + of_int offset) ~data
6 changes: 4 additions & 2 deletions lib/gpu/oam_table.mli
Expand Up @@ -10,11 +10,13 @@ type sprite = {
y_flip : bool;
x_flip : bool;
pallete : [`OBP0 | `OBP1];
tile_vram_bank : [`Bank0 | `Bank1];
pallete_num : int;
} [@@deriving show]

val create : start_addr:uint16 -> end_addr:uint16 -> oam_ram:Ram.t -> t
val create : start_addr:uint16 -> end_addr:uint16 -> t

val get_all_sprites : t -> sprite list
val get_all_sprites : t -> sprite array

val write_with_offset : t -> offset:int -> data:uint8 -> unit

Expand Down
1 change: 1 addition & 0 deletions lib/gpu/tile_data.ml
Expand Up @@ -18,6 +18,7 @@ let create ~tile_data_ram ~area1_start_addr ~area0_start_addr = {
area0_start_addr;
}

(* TODO: Optimize *)
let get_pixel t ~area ~(index:uint8) ~row ~col =
let row_offset = 2 * row |> Uint16.of_int in
let low_bit_row_addr = match area with
Expand Down
1 change: 0 additions & 1 deletion test/unit_tests/test_gpu.ml
Expand Up @@ -24,7 +24,6 @@ let create ()=
let oam_table = Oam_table.create
~start_addr:(of_int 0xFE00)
~end_addr:(of_int 0xFE9F)
~oam_ram:(Ram.create ~start_addr:(of_int 0xFE00) ~end_addr:(of_int 0xFE9F))
in
Interrupt_controller.write_byte ic ~addr:(Uint16.of_int 0xFFFF) ~data:(Uint8.of_int 0xFF);
Gpu.create
Expand Down
29 changes: 25 additions & 4 deletions test/unit_tests/test_oam_table.ml
Expand Up @@ -6,9 +6,29 @@ let create () =
Oam_table.create
~start_addr:(of_int 0xFE00)
~end_addr:(of_int 0xFE9F)
~oam_ram:(Ram.create ~start_addr:(of_int 0xFE00) ~end_addr:(of_int 0xFE9F))

let%expect_test "test" =
let%expect_test "test write then read" =
let t = create () in

let addr0 = Uint16.(of_int 0xFE00 + of_int 2 * of_int 4) in
let addr1 = Uint16.(addr0 + one) in
let addr2 = Uint16.(addr0 + of_int 2) in
let addr3 = Uint16.(addr0 + of_int 3) in
Oam_table.write_byte t ~addr:addr0 ~data:Uint8.(of_int 0x78);
Oam_table.write_byte t ~addr:addr1 ~data:Uint8.(of_int 0x4D);
Oam_table.write_byte t ~addr:addr2 ~data:Uint8.(of_int 0x90);
Oam_table.write_byte t ~addr:addr3 ~data:Uint8.(of_int 0x30);

[ Oam_table.read_byte t addr0;
Oam_table.read_byte t addr1;
Oam_table.read_byte t addr2;
Oam_table.read_byte t addr3; ]
|> List.iter (fun x -> Printf.printf "%s " (Uint8.show x));

[%expect {| $78 $4D $90 $30 |}]


let%expect_test "test get_all_sprites" =
let t = create () in

let offset = Uint16.(of_int 0xFE00 + of_int 2 * of_int 4) in
Expand All @@ -18,10 +38,11 @@ let%expect_test "test" =
Oam_table.write_byte t ~addr:Uint16.(offset + of_int 3) ~data:Uint8.(of_int 0x30);

Oam_table.get_all_sprites t
|> (fun l -> List.nth l 2)
|> (fun a -> Array.get a 2)
|> Oam_table.show_sprite
|> print_endline;

[%expect {|
{ Oam_table.y_pos = 104; x_pos = 69; tile_index = $90;
priority = `Sprite_top; y_flip = false; x_flip = true; pallete = `OBP1 } |}]
priority = `Sprite_top; y_flip = false; x_flip = true; pallete = `OBP1;
tile_vram_bank = `Bank0; pallete_num = 0 } |}]

0 comments on commit 47989b7

Please sign in to comment.