From 3b7e82eea4e38ba7e3f1c317293b9f4259f745ea Mon Sep 17 00:00:00 2001 From: buffi Date: Sun, 2 Apr 2023 17:35:30 +0200 Subject: [PATCH] video/epic12.cpp: Improved CV1000 blitter timings based on hardware testing, and removed unsafe mode. (#10849) * Fixed an off-by-one bug when setting dst_x_end and dst_y_end which caused destination writes to be one pixel too big. * Corrected calculations to utilize both source and destination VRAM row counts. * Updated VRAM access overhead. * Track of when blitter is idle, since long sequences of clipped draws can cause the blitter to not do any work while remaining busy. * Removed the edge case for "pixel widths need to be evenly divisable by 8" until more examples can be studied. * Pad draws to even 4-byte boundaries, since VRAM accesses are always 4 pixels at the time. * Also added documentation for 6 unhandled Blitter command offsets (see blitter_w). Thanks rtw for looking at these. * misc/cv1k.cpp: Updated some notes. --- src/devices/video/epic12.cpp | 388 ++++++++++++++------------------- src/devices/video/epic12.h | 46 ++-- src/devices/video/epic12in.hxx | 8 - src/mame/misc/cv1k.cpp | 34 ++- 4 files changed, 204 insertions(+), 272 deletions(-) diff --git a/src/devices/video/epic12.cpp b/src/devices/video/epic12.cpp index 357651c661703..cf14e6abc76eb 100644 --- a/src/devices/video/epic12.cpp +++ b/src/devices/video/epic12.cpp @@ -1,13 +1,30 @@ // license:BSD-3-Clause // copyright-holders:David Haywood, Luca Elia, MetalliC -/* emulation of Altera Cyclone EP1C12 FPGA programmed as a blitter */ +// emulation of Altera Cyclone EP1C12 FPGA programmed as a blitter #include "emu.h" #include "epic12.h" + #include "screen.h" +#define LOG_DEBUG (1U << 1) + +//#define VERBOSE (LOG_DEBUG) +#include "logmacro.h" + +#define LOGDBG(...) LOGMASKED(LOG_DEBUG, __VA_ARGS__) + + DEFINE_DEVICE_TYPE(EPIC12, epic12_device, "epic12", "EPIC12 Blitter") +static constexpr int EP1C_VRAM_CLK_NANOSEC = 13; +static constexpr int EP1C_SRAM_CLK_NANOSEC = 20; +static constexpr int EP1C_VRAM_H_LINE_PERIOD_NANOSEC = 63600; +static constexpr int EP1C_VRAM_H_LINE_DURATION_NANOSEC = 2160; +static constexpr int EP1C_FRAME_DURATION_NANOSEC = 16666666; +static constexpr int EP1C_DRAW_OPERATION_SIZE_BYTES = 20; +static constexpr int EP1C_CLIP_OPERATION_SIZE_BYTES = 2; + epic12_device::epic12_device(const machine_config &mconfig, const char *tag, device_t *owner, u32 clock) : device_t(mconfig, EPIC12, tag, owner, clock) , device_video_interface(mconfig, *this) @@ -16,8 +33,6 @@ epic12_device::epic12_device(const machine_config &mconfig, const char *tag, dev , m_maincpu(*this, finder_base::DUMMY_TAG) , m_port_r_cb(*this) { - m_is_unsafe = 0; - m_delay_scale = 0; m_blitter_request = nullptr; m_blitter_delay_timer = nullptr; m_blitter_busy = 0; @@ -31,7 +46,8 @@ epic12_device::epic12_device(const machine_config &mconfig, const char *tag, dev m_gfx_scroll_0_y_shadowcopy = 0; m_gfx_scroll_1_x_shadowcopy = 0; m_gfx_scroll_1_y_shadowcopy = 0; - blit_delay = 0; + m_blit_delay_ns = 0; + m_blit_idle_op_bytes = 0; } TIMER_CALLBACK_MEMBER(epic12_device::blitter_delay_callback) @@ -72,7 +88,6 @@ void epic12_device::device_start() save_item(NAME(m_gfx_scroll_0_y)); save_item(NAME(m_gfx_scroll_1_x)); save_item(NAME(m_gfx_scroll_1_y)); - save_item(NAME(m_delay_scale)); save_item(NAME(m_gfx_addr_shadowcopy)); save_item(NAME(m_gfx_scroll_0_x_shadowcopy)); save_item(NAME(m_gfx_scroll_0_y_shadowcopy)); @@ -84,20 +99,14 @@ void epic12_device::device_start() save_item(NAME(m_firmware_port)); save_item(NAME(m_firmware)); save_item(NAME(m_firmware_version)); + save_item(NAME(m_blit_delay_ns)); + save_item(NAME(m_blit_idle_op_bytes)); } void epic12_device::device_reset() { - if (m_is_unsafe) - { - m_use_ram = m_ram16; - m_work_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_HIGH_FREQ|WORK_QUEUE_FLAG_MULTI); - } - else - { - m_use_ram = m_ram16_copy.get(); // slow mode - m_work_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_HIGH_FREQ); - } + m_use_ram = m_ram16_copy.get(); + m_work_queue = osd_work_queue_alloc(WORK_QUEUE_FLAG_HIGH_FREQ); // cache table to avoid divides in blit code, also pre-clamped for (int y = 0; y < 0x40; y++) @@ -125,7 +134,6 @@ void epic12_device::device_reset() u8 epic12_device::colrtable[0x20][0x40]; u8 epic12_device::colrtable_rev[0x20][0x40]; u8 epic12_device::colrtable_add[0x20][0x20]; -u64 epic12_device::blit_delay; inline u16 epic12_device::READ_NEXT_WORD(offs_t *addr) { @@ -134,7 +142,6 @@ inline u16 epic12_device::READ_NEXT_WORD(offs_t *addr) *addr += 2; -// printf("data %04x\n", data); return data; } @@ -146,7 +153,6 @@ inline u16 epic12_device::COPY_NEXT_WORD(address_space &space, offs_t *addr) *addr += 2; -// printf("data %04x\n", data); return data; } @@ -187,6 +193,18 @@ inline void epic12_device::gfx_upload_shadow_copy(address_space &space, offs_t * COPY_NEXT_WORD(space, addr); } } + + // Time spent on uploads is mostly due to Main RAM accesses. + // The Blitter will send BREQ requests to the SH-3, to access Main RAM + // and then write it to VRAM. + // The number of bytes to read are the sum of a 16b fixed header and the pixel + // data (2 byte per pixel). RAM accesses are 32bit, so divide by four for clocks. + // + // TODO: There's additional overhead to these request thats are not included. The BREQ + // assertion also puts CPU into WAIT, if it needs uncached RAM accesses. + int num_sram_clk = (16 + dimx * dimy * 2 ) / 4; + m_blit_delay_ns += num_sram_clk * EP1C_SRAM_CLK_NANOSEC; + m_blit_idle_op_bytes = 0; } inline void epic12_device::gfx_upload(offs_t *addr) @@ -327,6 +345,31 @@ const epic12_device::blitfunction epic12_device::f1_ti0_tr0_blit_funcs[64] = epic12_device::draw_sprite_f1_ti0_tr0_s0_d7, epic12_device::draw_sprite_f1_ti0_tr0_s1_d7, epic12_device::draw_sprite_f1_ti0_tr0_s2_d7, epic12_device::draw_sprite_f1_ti0_tr0_s3_d7, epic12_device::draw_sprite_f1_ti0_tr0_s4_d7, epic12_device::draw_sprite_f1_ti0_tr0_s5_d7, epic12_device::draw_sprite_f1_ti0_tr0_s6_d7, epic12_device::draw_sprite_f1_ti0_tr0_s7_d7, }; +/* + Calculate number of VRAM row accesses a draw will perform. + Source data will typically be aligned well with VRAM, but this is not the case for the destination. + As an example, drawing a 64x32 pixel image will usually read from two VRAM rows for source data, + but if the destination start coordinate is (x=10, y=10), each of the 32x32px chunks of source data will + touch 4 rows of destination VRAM, leading to a total of 8 destination VRAM accesses. +*/ +inline u16 calculate_vram_accesses(u16 start_x, u16 start_y, u16 dimx, u16 dimy) +{ + int x_rows = 0; + int num_vram_rows = 0; + for (int x_pixels = dimx; x_pixels > 0; x_pixels -= 32) + { + x_rows++; + if (((start_x & 31) + std::min(32, x_pixels)) > 32) + x_rows++; // Drawing across multiple horizontal VRAM row boundaries. + } + for (int y_pixels = dimy; y_pixels > 0; y_pixels -= 32) + { + num_vram_rows += x_rows; + if (((start_y & 31) + std::min(32, y_pixels)) > 32) + num_vram_rows += x_rows; // Drawing across multiple vertical VRAM row boundaries. + } + return num_vram_rows; +} /* Draw command @@ -367,19 +410,59 @@ inline void epic12_device::gfx_draw_shadow_copy(address_space &space, offs_t *ad { COPY_NEXT_WORD(space, addr); COPY_NEXT_WORD(space, addr); - COPY_NEXT_WORD(space, addr); - COPY_NEXT_WORD(space, addr); - COPY_NEXT_WORD(space, addr); // const u16 dst_x_start = COPY_NEXT_WORD(space, addr); - COPY_NEXT_WORD(space, addr); // const u16 dst_y_start = COPY_NEXT_WORD(space, addr); - const u16 w = COPY_NEXT_WORD(space, addr); - const u16 h = COPY_NEXT_WORD(space, addr); + u16 src_x_start = COPY_NEXT_WORD(space, addr); + u16 src_y_start = COPY_NEXT_WORD(space, addr); + u16 dst_x_start = COPY_NEXT_WORD(space, addr); + u16 dst_y_start = COPY_NEXT_WORD(space, addr); + u16 src_dimx = (COPY_NEXT_WORD(space, addr) & 0x1fff) + 1; + u16 src_dimy = (COPY_NEXT_WORD(space, addr) & 0x0fff) + 1; COPY_NEXT_WORD(space, addr); COPY_NEXT_WORD(space, addr); - // todo, calcualte clipping. - blit_delay += w * h; -} + // Calculate Blitter delay for the Draw operation. + // On real hardware, the Blitter will read operations into a FIFO queue + // by asserting BREQ on the SH3 and then reading from Main RAM. + // Since the reads are done concurrently to executions of operations, its + // ok to estimate the delay all at once instead for emulation purposes. + + u16 dst_x_end = dst_x_start + src_dimx - 1; + u16 dst_y_end = dst_y_start + src_dimy - 1; + + // Sprites fully outside of clipping area should not be drawn. + if (dst_x_start > m_clip.max_x || dst_x_end < m_clip.min_x || dst_y_start > m_clip.max_y || dst_y_end < m_clip.min_y) + { + idle_blitter(EP1C_DRAW_OPERATION_SIZE_BYTES); + return; + } + m_blit_idle_op_bytes = 0; // Blitter no longer idle. + + // VRAM data is laid out in 32x32 pixel rows. Calculate amount of rows accessed. + int src_num_vram_rows = calculate_vram_accesses(src_x_start, src_y_start, src_dimx, src_dimy); + int dst_num_vram_rows = calculate_vram_accesses(dst_x_start, dst_y_start, src_dimx, src_dimy); + + // Since draws are done 4 pixels at the time, extend the draw area to coordinates aligned for this. + // Doing this after VRAM calculations simplify things a bit, and these extensions will never make the + // destination area span additional VRAM rows. + dst_x_start -= dst_x_start & 3; + dst_x_end += (4 - ((dst_x_end + 1) & 3)) & 3; + u16 dst_dimx = dst_x_end - dst_x_start + 1; + u16 dst_dimy = dst_y_end - dst_y_start + 1; + + // Number of VRAM CLK cycles needed to draw a sprite is sum of: + // - Number of pixels read from source divided by 4 (Each CLK reads 4 pixels, since 32bit DDR). + // - Number of pixels read from destination divided by 4. + // - Pixels written to destination divided by 4. + // - VRAM access overhead: + // - 6 CLK of overhead after each read from a source VRAM row. + // - 20 CLK of overhead between read and write of each destination VRAM row. + // - 11 CLK of overhead after each write to a destination VRAM row. + // - 12 CLK of additional overhead per sprite at the end of writing. + // Note: Details are from https://buffis.com/docs/CV1000_Blitter_Research_by_buffi.pdf + // There may be mistakes. + u32 num_vram_clk = src_dimx * src_dimy / 4 + dst_dimx * dst_dimy / 2 + src_num_vram_rows * 6 + dst_num_vram_rows * (20 + 11) + 12; + m_blit_delay_ns += num_vram_clk * EP1C_VRAM_CLK_NANOSEC; +} inline void epic12_device::gfx_draw(offs_t *addr) { @@ -592,8 +675,11 @@ inline void epic12_device::gfx_draw(offs_t *addr) void epic12_device::gfx_create_shadow_copy(address_space &space) { offs_t addr = m_gfx_addr & 0x1fffffff; - m_clip.set(m_gfx_scroll_1_x_shadowcopy, m_gfx_scroll_1_x_shadowcopy + 320-1, m_gfx_scroll_1_y_shadowcopy, m_gfx_scroll_1_y_shadowcopy + 240-1); + // TODO: Stop respecting this clipping area in draw operations not fully outside of it. + // For Muchi Muchi Pork, it looks like it draws 32px around visible area, when checking in Special Mode test menu VRAM viewer. + // Draws outside of clip area is also visible in logic analyzer for Pink Sweets. + m_clip.set(m_gfx_scroll_1_x_shadowcopy, m_gfx_scroll_1_x_shadowcopy + 320-1, m_gfx_scroll_1_y_shadowcopy, m_gfx_scroll_1_y_shadowcopy + 240-1); while (1) { // request commands from main CPU RAM @@ -610,6 +696,7 @@ void epic12_device::gfx_create_shadow_copy(address_space &space) m_clip.set(m_gfx_scroll_1_x_shadowcopy, m_gfx_scroll_1_x_shadowcopy + 320 - 1, m_gfx_scroll_1_y_shadowcopy, m_gfx_scroll_1_y_shadowcopy + 240 - 1); else m_clip.set(0, 0x2000 - 1, 0, 0x1000 - 1); + idle_blitter(EP1C_CLIP_OPERATION_SIZE_BYTES); break; case 0x2000: @@ -672,83 +759,17 @@ void epic12_device::gfx_exec(void) } } - -void epic12_device::gfx_exec_unsafe(void) -{ - offs_t addr = m_gfx_addr & 0x1fffffff; - m_clip.set(m_gfx_scroll_1_x, m_gfx_scroll_1_x + 320 - 1, m_gfx_scroll_1_y, m_gfx_scroll_1_y + 240 - 1); - -// logerror("GFX EXEC: %08X\n", addr); - - while (1) - { - // request commands from main CPU RAM - const u16 data = READ_NEXT_WORD(&addr); - - switch (data & 0xf000) - { - case 0x0000: - case 0xf000: - return; - - case 0xc000: - if (READ_NEXT_WORD(&addr)) // cliptype - m_clip.set(m_gfx_scroll_1_x, m_gfx_scroll_1_x + 320 - 1, m_gfx_scroll_1_y, m_gfx_scroll_1_y + 240 - 1); - else - m_clip.set(0, 0x2000 - 1, 0, 0x1000 - 1); - break; - - case 0x2000: - addr -= 2; - gfx_upload(&addr); - break; - - case 0x1000: - addr -= 2; - gfx_draw(&addr); - break; - - default: - popmessage("GFX op = %04X", data); - return; - } - } -} - - void *epic12_device::blit_request_callback(void *param, int threadid) { epic12_device *object = reinterpret_cast(param); - object->gfx_exec(); return nullptr; } -void *epic12_device::blit_request_callback_unsafe(void *param, int threadid) -{ - epic12_device *object = reinterpret_cast(param); - - blit_delay = 0; - object->gfx_exec_unsafe(); - return nullptr; -} - - u32 epic12_device::gfx_ready_r() { - return 0x00000010; -} - -u32 epic12_device::gfx_ready_r_unsafe() -{ - if (m_blitter_busy) - { - m_maincpu->spin_until_time(attotime::from_usec(10)); - return 0x00000000; - } - else - return 0x00000010; + return m_blitter_busy ? 0x00000000 : 0x00000010; } void epic12_device::gfx_exec_w(address_space &space, offs_t offset, u32 data, u32 mem_mask) @@ -757,7 +778,6 @@ void epic12_device::gfx_exec_w(address_space &space, offs_t offset, u32 data, u3 { if (data & 1) { - //auto profile = g_profiler.start(PROFILER_USER1); // make sure we've not already got a request running if (m_blitter_request) { @@ -769,79 +789,48 @@ void epic12_device::gfx_exec_w(address_space &space, offs_t offset, u32 data, u3 osd_work_item_release(m_blitter_request); } - blit_delay = 0; - gfx_create_shadow_copy(space); // create a copy of the blit list so we can safely thread it. - - if (blit_delay) - { - m_blitter_busy = 1; - m_blitter_delay_timer->adjust(attotime::from_nsec(blit_delay*8)); // NOT accurate timing (currently ignored anyway) - } - - m_gfx_addr_shadowcopy = m_gfx_addr; m_gfx_scroll_0_x_shadowcopy = m_gfx_scroll_0_x; m_gfx_scroll_0_y_shadowcopy = m_gfx_scroll_0_y; m_gfx_scroll_1_x_shadowcopy = m_gfx_scroll_1_x; m_gfx_scroll_1_y_shadowcopy = m_gfx_scroll_1_y; - m_blitter_request = osd_work_item_queue(m_work_queue, blit_request_callback, (void*)this, 0); - } - } -} + // Create a copy of the blit list so we can safely thread it. + // Copying the Blitter operations will also estimate the delay needed for processing. + m_blit_delay_ns = 0; + gfx_create_shadow_copy(space); -void epic12_device::gfx_exec_w_unsafe(offs_t offset, u32 data, u32 mem_mask) -{ - if (ACCESSING_BITS_0_7) - { - if (data & 1) - { - //auto profile = g_profiler.start(PROFILER_USER1); - // make sure we've not already got a request running - if (m_blitter_request) - { - int result; - do - { - result = osd_work_item_wait(m_blitter_request, 1000); - } while (result == 0); - osd_work_item_release(m_blitter_request); - } + // Every EP1C_VRAM_H_LINE_PERIOD_NANOSEC, the Blitter will block other operations, due + // to fetching a horizontal line from VRAM for output. + m_blit_delay_ns += std::floor( m_blit_delay_ns / EP1C_VRAM_H_LINE_PERIOD_NANOSEC ) * EP1C_VRAM_H_LINE_DURATION_NANOSEC; - if (blit_delay) - { - m_blitter_busy = 1; - int delay = blit_delay*(15 * m_delay_scale / 50); - //printf("delay %d\n", delay); - m_blitter_delay_timer->adjust(attotime::from_nsec(delay)); - } - else - { - m_blitter_busy = 0; - } + // Check if Blitter takes longer than a frame to render. + // In practice, there's a bit less time than this to allow for lack of slowdown but + // for debugging purposes this is an ok approximation. + if (m_blit_delay_ns > EP1C_FRAME_DURATION_NANOSEC) + LOGDBG("Blitter delay! Blit duration %lld ns.\n", m_blit_delay_ns); + + m_blitter_busy = 1; + m_blitter_delay_timer->adjust(attotime::from_nsec(m_blit_delay_ns)); - m_blitter_request = osd_work_item_queue(m_work_queue, blit_request_callback_unsafe, (void*)this, 0); + m_gfx_addr_shadowcopy = m_gfx_addr; + m_blitter_request = osd_work_item_queue(m_work_queue, blit_request_callback, (void*)this, 0); } } } - void epic12_device::draw_screen(bitmap_rgb32 &bitmap, const rectangle &cliprect) { - if (!m_is_unsafe) + if (m_blitter_request) { - if (m_blitter_request) + int result; + do { - int result; - do - { - result = osd_work_item_wait(m_blitter_request, 1000); - } while (result == 0); - osd_work_item_release(m_blitter_request); - } + result = osd_work_item_wait(m_blitter_request, 1000); + } while (result == 0); + osd_work_item_release(m_blitter_request); } int scroll_0_x, scroll_0_y; -// int scroll_1_x, scroll_1_y; bitmap.fill(0, cliprect); @@ -867,10 +856,7 @@ void epic12_device::draw_screen(bitmap_rgb32 &bitmap, const rectangle &cliprect) scroll_0_x = -m_gfx_scroll_0_x; scroll_0_y = -m_gfx_scroll_0_y; -// scroll_1_x = -m_gfx_scroll_1_x; -// scroll_1_y = -m_gfx_scroll_1_y; - //printf("SCREEN UPDATE\n %d %d %d %d\n", scroll_0_x, scroll_0_y, scroll_1_x, scroll_1_y); #if DEBUG_VRAM_VIEWER if (m_debug_vram_view_en) @@ -905,31 +891,6 @@ u32 epic12_device::blitter_r(offs_t offset, u32 mem_mask) return 0; } -u32 epic12_device::blitter_r_unsafe(offs_t offset, u32 mem_mask) -{ - switch (offset * 4) - { - case 0x10: - return gfx_ready_r_unsafe(); - - case 0x24: - return 0xffffffff; - - case 0x28: - return 0xffffffff; - - case 0x50: - return m_port_r_cb(); - - default: - logerror("unknownblitter_r %08x %08x\n", offset*4, mem_mask); - break; - - } - return 0; -} - - void epic12_device::blitter_w(address_space &space, offs_t offset, u32 data, u32 mem_mask) { switch (offset * 4) @@ -950,35 +911,12 @@ void epic12_device::blitter_w(address_space &space, offs_t offset, u32 data, u32 COMBINE_DATA(&m_gfx_scroll_0_y); break; - case 0x40: - COMBINE_DATA(&m_gfx_scroll_1_x); - break; - - case 0x44: - COMBINE_DATA(&m_gfx_scroll_1_y); - break; - - } -} - -void epic12_device::blitter_w_unsafe(address_space &space, offs_t offset, u32 data, u32 mem_mask) -{ - switch (offset * 4) - { - case 0x04: - gfx_exec_w_unsafe(offset, data, mem_mask); - break; - - case 0x08: - COMBINE_DATA(&m_gfx_addr); - break; - - case 0x14: - COMBINE_DATA(&m_gfx_scroll_0_x); - break; - - case 0x18: - COMBINE_DATA(&m_gfx_scroll_0_y); + case 0x24: // Some sort of handshake at start of IRQ's. + case 0x28: // Related to coins entered. + case 0x30: // Contrast (test menu). + case 0x34: // Brightness (test menu). + case 0x38: // V offset (test menu). + case 0x3c: // H offset (test menu). break; case 0x40: @@ -999,17 +937,8 @@ void epic12_device::install_handlers(int addr1, int addr2) read32s_delegate read(*this); write32_delegate write(*this); - if (m_is_unsafe) - { - printf("using unsafe blit code!\n"); - read = read32s_delegate(*this, FUNC(epic12_device::blitter_r_unsafe)); - write = write32_delegate(*this, FUNC(epic12_device::blitter_w_unsafe)); - } - else - { - read = read32s_delegate(*this, FUNC(epic12_device::blitter_r)); - write = write32_delegate(*this, FUNC(epic12_device::blitter_w)); - } + read = read32s_delegate(*this, FUNC(epic12_device::blitter_r)); + write = write32_delegate(*this, FUNC(epic12_device::blitter_w)); space.install_read_handler(addr1, addr2, std::move(read), 0xffffffffffffffffU); space.install_write_handler(addr1, addr2, std::move(write), 0xffffffffffffffffU); @@ -1028,28 +957,31 @@ void epic12_device::fpga_w(offs_t offset, u64 data, u64 mem_mask) // data & 0x10 = CLK // data & 0x20 = DATA - if((data & 0x08) && !(m_firmware_port & 0x10) && (data & 0x10)) { - if(m_firmware_pos < 2323240 && (data & 0x20)) + if ((data & 0x08) && !(m_firmware_port & 0x10) && (data & 0x10)) + { + if (m_firmware_pos < 2323240 && (data & 0x20)) m_firmware[m_firmware_pos >> 3] |= 1 << (m_firmware_pos & 7); m_firmware_pos++; } m_firmware_port = data; - if(m_firmware_pos == 2323240) { + if (m_firmware_pos == 2323240) + { u8 checksum = 0; for(u8 c : m_firmware) checksum += c; - switch(checksum) { - case 0x03: m_firmware_version = FW_A; break; - case 0x3e: m_firmware_version = FW_B; break; - case 0xf9: m_firmware_version = FW_C; break; - case 0xe1: m_firmware_version = FW_D; break; - default: m_firmware_version = -1; break; + switch (checksum) + { + case 0x03: m_firmware_version = FW_A; break; + case 0x3e: m_firmware_version = FW_B; break; + case 0xf9: m_firmware_version = FW_C; break; + case 0xe1: m_firmware_version = FW_D; break; + default: m_firmware_version = -1; break; } - if(m_firmware_version < 0) + if (m_firmware_version < 0) logerror("Unrecognized firmware version\n"); else logerror("Detected firmware version %c\n", 'A' + m_firmware_version); diff --git a/src/devices/video/epic12.h b/src/devices/video/epic12.h index 82e58d07e9beb..0b49040891e96 100644 --- a/src/devices/video/epic12.h +++ b/src/devices/video/epic12.h @@ -16,8 +16,6 @@ class epic12_device : public device_t, public device_video_interface template void set_cpu(T &&maintag) { m_maincpu.set_tag(std::forward(maintag)); } auto port_r_callback() { return m_port_r_cb.bind(); } void set_rambase(u16* rambase) { m_ram16 = rambase; } - void set_delay_scale(int delay_scale) { m_delay_scale = delay_scale; } - void set_is_unsafe(int is_unsafe) { m_is_unsafe = is_unsafe; } inline u16 READ_NEXT_WORD(offs_t *addr); @@ -47,12 +45,8 @@ class epic12_device : public device_t, public device_video_interface size_t m_main_ramsize; // type D has double the main ram size_t m_main_rammask; - int m_is_unsafe; - int m_delay_scale; - void install_handlers(int addr1, int addr2); - // thread safe mode, with no delays & shadow ram copy u32 blitter_r(offs_t offset, u32 mem_mask = ~0); void blitter_w(address_space &space, offs_t offset, u32 data, u32 mem_mask = ~0); u32 m_gfx_addr_shadowcopy; @@ -69,15 +63,15 @@ class epic12_device : public device_t, public device_video_interface u32 gfx_ready_r(); void gfx_exec_w(address_space &space, offs_t offset, u32 data, u32 mem_mask = ~0); - // for thread unsafe mode with blitter delays, no shadow copy of RAM - u32 blitter_r_unsafe(offs_t offset, u32 mem_mask = ~0); - void blitter_w_unsafe(address_space &space, offs_t offset, u32 data, u32 mem_mask = ~0); - u32 gfx_ready_r_unsafe(); - void gfx_exec_w_unsafe(offs_t offset, u32 data, u32 mem_mask = ~0); - void gfx_exec_unsafe(void); - static void *blit_request_callback_unsafe(void *param, int threadid); - protected: + // Number of bytes that are read each time Blitter fetches operations from SRAM. + static inline constexpr int OPERATION_CHUNK_SIZE_BYTES = 64; + + // Approximate time it takes to fetch a chunk of operations. + // This is composed of the time that the Blitter holds the Bus Request (BREQ) signal + // of the SH-3, as well as the overhead between requests. + static inline constexpr int OPERATION_READ_CHUNK_INTERVAL_NS = 700; + // The firmware versions enum { // Used by ibara & mushisama @@ -827,6 +821,27 @@ class epic12_device : public device_t, public device_video_interface virtual void device_start() override; virtual void device_reset() override; + // Called when a Blitter operation does not cause any draws/uploads to be performed. + // If multiple draws in a row are performed outside of an active clipping area, + // the Blitter will be reading operations from SRAM in 64 byte chunks, but not + // actually performing any work. + // This will still be visible from the CPU as Blitter being busy, until the + // operation list has exited. + // + // TODO: Having 64 bytes of non-drawing operations in a row will only cause the Blitter + // to idle if the operations are read from the same 64 byte chunk (and not split between two). + // More proper handling of this would be to change the reads from SRAM to be done 64 bytes at the time + // into a FIFO, but that's a fair amount of work. + void idle_blitter(u8 operation_size_bytes) + { + m_blit_idle_op_bytes += operation_size_bytes; + if (m_blit_idle_op_bytes >= OPERATION_CHUNK_SIZE_BYTES) + { + m_blit_idle_op_bytes -= OPERATION_CHUNK_SIZE_BYTES; + m_blit_delay_ns += OPERATION_READ_CHUNK_INTERVAL_NS; + } + } + TIMER_CALLBACK_MEMBER(blitter_delay_callback); osd_work_queue *m_work_queue; @@ -835,6 +850,8 @@ class epic12_device : public device_t, public device_video_interface // blit timing emu_timer *m_blitter_delay_timer; int m_blitter_busy; + u64 m_blit_delay_ns; + u16 m_blit_idle_op_bytes; // fpga firmware std::vector m_firmware; @@ -856,7 +873,6 @@ class epic12_device : public device_t, public device_video_interface static u8 colrtable[0x20][0x40]; static u8 colrtable_rev[0x20][0x40]; static u8 colrtable_add[0x20][0x20]; - static u64 blit_delay; static const blitfunction f0_ti1_tr1_blit_funcs[64]; static const blitfunction f0_ti1_tr0_blit_funcs[64]; diff --git a/src/devices/video/epic12in.hxx b/src/devices/video/epic12in.hxx index c43e24cf480a4..23d6a07f65e63 100644 --- a/src/devices/video/epic12in.hxx +++ b/src/devices/video/epic12in.hxx @@ -81,14 +81,6 @@ void epic12_device::FUNCNAME(BLIT_PARAMS) if (dst_x_end > clip->max_x) dimx -= (dst_x_end-1) - clip->max_x; -// wrong/unsafe slowdown sim - if (dimy > starty && dimx > startx) - { - blit_delay += (dimy - starty) * (dimx - startx); - - //printf("delay is now %d\n", blit_delay); - } - #if BLENDED == 1 #if _SMODE == 0 #if _DMODE == 0 diff --git a/src/mame/misc/cv1k.cpp b/src/mame/misc/cv1k.cpp index ebd0da75fc1d4..57fc9fe1cd26a 100644 --- a/src/mame/misc/cv1k.cpp +++ b/src/mame/misc/cv1k.cpp @@ -94,7 +94,7 @@ PCB CV1000-B / CV1000-D CPU: Hitachi 6417709S SH3 clocked at 102.4MHz (12.800MHz * 8) Sound: Yamaha YMZ770C-F clocked at 16.384MHz -Other: Altera Cyclone EPIC12 FPGA +Other: Altera Cyclone EP1C12 FPGA Altera EPM7032 (MAX 7000 Series) at U13 OSC: @@ -103,8 +103,8 @@ Other: Altera Cyclone EPIC12 FPGA X3 16.384MHz (Yamaha YMZ770C-F clock) Memory: - U6 (SDRAM) MT46V16M16 ? 4 MBit x 16 x 4 banks, RAM (256 MBit) - U7 (SDRAM) MT46V16M16 ? 4 MBit x 16 x 4 banks, RAM (256 MBit) + U6 (SDRAM) MT46V16M16 ? 4 MBit x 16 x 4 banks, DDR RAM (256 MBit) + U7 (SDRAM) MT46V16M16 ? 4 MBit x 16 x 4 banks, DDR RAM (256 MBit) U1 (SDRAM) MT48LC2M32 ? 512K x 32 x 4 banks, (64 MBit) for CV1000-B U1 (SDRAM) IS42S32400 - 1024K x 32 x 4 banks, (128 MBit) for CV1000-D @@ -138,13 +138,18 @@ Dipswitches & Push Buttons: D1-D6 (LED) Status LED's. D6 lights up at power on then shuts off, D2 indicates coinage. Note: * The Altera EPM7032 usually stamped / labeled with the Cave game ID number as listed above. - * Actual flash ROMs will vary by manufacturer but will be compatible with flash ROM listed. + * U4, U23, U24 flash ROMs will vary by manufacturer but will be compatible with flash ROM listed. + * The game logic does a manufacturer check on U2. K9F1G08U0M and two other device codes are allowed. + Trying to replace U2 with a flash ROM from another manufacturer does not work. Allowed devices are: + - Manufacturer ID: 0x98, ID code: 0x76 + - Manufacturer ID: 0x98, ID code: 0x79 + - Manufacturer ID: 0xEC, ID code: 0xF1 <- This is K9F1G08U0M * There are two known CV1000-B PCB revisions. The newer one has some minor hardware differences and uses an updated FPGA firmware, they are not compatible with eachother. * The CV1000-D revision PCB has double the RAM at U1, double the ROM at U4 and no battery. The CV1000-D is used for Dodonpachi Daifukkatsu and later games. Commonly referred to as SH3B PCB. -Information by The Sheep, rtw, Ex-Cyber, BrianT & Guru +Information by The Sheep, rtw, Ex-Cyber, BrianT, Guru & buffi ------------------------------------------------------ @@ -173,10 +178,10 @@ Remaining Video issues - mmpork startup screen flicker - the FOR USE IN JAPAN screen doesn't appear on the real PCB until after the graphics are fully loaded, it still displays 'please wait' until that point. - is the use of the 'scroll' registers 100% correct? (related to above?) - Sometimes the 'sprites' in mushisam lag by a frame vs the 'backgrounds' is this a timing problem, does the real game do it? + - End of Blit should send IRQ1. (one game has a valid irq routine that looks like it was used for profiling, but nothing depends on it) -Blitter Timing - - Correct slowdown emulation and flags (depends on blit mode, and speed of RAM) - could do with the recompiler or alt idle skips on the busy flag wait loops - - End of Blit IRQ? (one game has a valid irq routine that looks like it was used for profiling, but nothing depends on it) +Timing + - Correct CPU slowdown emulation and flags (and speed of RAM). Most slowdown seems due to SH-3 uncached RAM access wait states, which is not implemented. 31/12/2021: Akai Katana and Dodonpachi Saidaioujou removed at the request of the @@ -210,7 +215,6 @@ class cv1k_state : public driver_device m_eeprom(*this, "eeprom"), m_ram(*this, "mainram"), m_rombase(*this, "maincpu"), - m_blitrate(*this, "BLITRATE"), m_eepromout(*this, "EEPROMOUT"), m_idleramoffs(0), m_idlepc(0) @@ -247,7 +251,6 @@ class cv1k_state : public driver_device uint32_t screen_update(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect); - required_ioport m_blitrate; required_ioport m_eepromout; uint32_t m_idleramoffs; @@ -266,8 +269,6 @@ class cv1k_state : public driver_device uint32_t cv1k_state::screen_update(screen_device &screen, bitmap_rgb32 &bitmap, const rectangle &cliprect) { - m_blitter->set_delay_scale(m_blitrate->read()); - m_blitter->draw_screen(bitmap,cliprect); return 0; } @@ -436,14 +437,6 @@ static INPUT_PORTS_START( cv1k_base ) PORT_BIT( 0x01, IP_ACTIVE_HIGH, IPT_OUTPUT ) PORT_WRITE_LINE_DEVICE_MEMBER("eeprom", rtc9701_device, write_bit) PORT_BIT( 0x02, IP_ACTIVE_HIGH, IPT_OUTPUT ) PORT_WRITE_LINE_DEVICE_MEMBER("eeprom", rtc9701_device, set_clock_line) PORT_BIT( 0x04, IP_ACTIVE_LOW, IPT_OUTPUT ) PORT_WRITE_LINE_DEVICE_MEMBER("eeprom", rtc9701_device, set_cs_line) - - PORT_START("BLITCFG") // the Blitter Delay code isn't threadsafe, DO NOT turn on by default - PORT_CONFNAME( 0x0001, 0x0000, "Use (unsafe) Blitter Delay (requires reset)" ) - PORT_CONFSETTING( 0x0000, DEF_STR( No ) ) - PORT_CONFSETTING( 0x0001, DEF_STR( Yes ) ) - - PORT_START("BLITRATE") - PORT_ADJUSTER(50, "Blitter Delay") INPUT_PORTS_END static INPUT_PORTS_START( cv1k ) @@ -466,7 +459,6 @@ INPUT_PORTS_END void cv1k_state::machine_reset() { m_blitter->set_rambase(reinterpret_cast(m_ram.target())); - m_blitter->set_is_unsafe(ioport("BLITCFG")->read()); m_blitter->install_handlers( 0x18000000, 0x18000057 ); m_blitter->reset(); }