diff --git a/src/devices/video/poly.h b/src/devices/video/poly.h index 8fa634283219d..9f1243e2a7b09 100644 --- a/src/devices/video/poly.h +++ b/src/devices/video/poly.h @@ -280,7 +280,6 @@ class poly_manager { BaseType start; // parameter value at start BaseType dpdx; // dp/dx relative to start - BaseType dpdy; // dp/dy relative to start }; int16_t startx, stopx; // starting (inclusive)/ending (exclusive) endpoints std::array param; // array of parameter start/delays @@ -795,7 +794,6 @@ uint32_t poly_manager::render_tile(recta { extent.param[paramnum].start = v1->p[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum]; extent.param[paramnum].dpdx = param_dpdx[paramnum]; - extent.param[paramnum].dpdy = param_dpdy[paramnum]; } } } @@ -960,7 +958,6 @@ uint32_t poly_manager::render_triangle(c { extent.param[paramnum].start = param_start[paramnum] + fullstartx * param_dpdx[paramnum] + fully * param_dpdy[paramnum]; extent.param[paramnum].dpdx = param_dpdx[paramnum]; - extent.param[paramnum].dpdy = param_dpdy[paramnum]; } } } @@ -1077,7 +1074,6 @@ uint32_t poly_manager::render_extents(re { extent.param[paramnum].start = srcextent.param[paramnum].start; extent.param[paramnum].dpdx = srcextent.param[paramnum].dpdx; - extent.param[paramnum].dpdy = srcextent.param[paramnum].dpdy; } extent.userdata = srcextent.userdata; @@ -1260,7 +1256,6 @@ uint32_t poly_manager::render_polygon(re extent.param[paramnum].start = lparam;// - (BaseType(istartx) + 0.5f) * dpdx; extent.param[paramnum].dpdx = dpdx; - extent.param[paramnum].dpdy = ledge->dpdy[paramnum]; } } diff --git a/src/mame/sega/model2.cpp b/src/mame/sega/model2.cpp index 9be7e70bfd909..4db117368346b 100644 --- a/src/mame/sega/model2.cpp +++ b/src/mame/sega/model2.cpp @@ -10,7 +10,6 @@ MAME driver by R. Belmont, Olivier Galibert, ElSemi, Angelo Salese and Matthew Daniels. TODO: - - Mip Mapping still needs to be properly sorted in the renderer; - outputs and artwork (for gearbox indicators); - clean-ups; @@ -1028,14 +1027,14 @@ void model2_tgp_state::tex1_w(offs_t offset, u32 data) } } -u16 model2_state::lumaram_r(offs_t offset) +u8 model2_state::lumaram_r(offs_t offset) { return m_lumaram[offset]; } -void model2_state::lumaram_w(offs_t offset, u16 data, u16 mem_mask) +void model2_state::lumaram_w(offs_t offset, u8 data) { - COMBINE_DATA(&m_lumaram[offset]); + m_lumaram[offset] = data; } /* Top Skater reads here and discards the result */ @@ -1107,8 +1106,6 @@ void model2_state::model2_base_mem(address_map &map) // format is xGGGGGRRRRRBBBBB (512x400) map(0x11600000, 0x1167ffff).rw(FUNC(model2_state::fbvram_bankA_r), FUNC(model2_state::fbvram_bankA_w)).flags(i960_cpu_device::BURST); // framebuffer A (last bronx title screen) map(0x11680000, 0x116fffff).rw(FUNC(model2_state::fbvram_bankB_r), FUNC(model2_state::fbvram_bankB_w)).flags(i960_cpu_device::BURST); // framebuffer B - - map(0x12800000, 0x1281ffff).rw(FUNC(model2_state::lumaram_r), FUNC(model2_state::lumaram_w)).umask32(0x0000ffff).flags(i960_cpu_device::BURST); // polygon "luma" RAM } /* common map for 5881 protection */ @@ -1244,8 +1241,11 @@ void model2_tgp_state::model2_tgp_mem(address_map &map) map(0x00980000, 0x00980003).rw(FUNC(model2_tgp_state::copro_ctl1_r), FUNC(model2_tgp_state::copro_ctl1_w)); map(0x00980008, 0x0098000b).w(FUNC(model2_tgp_state::geo_ctl1_w)); + map(0x10800000, 0x10800003).nopr(); // polygon count register + map(0x12000000, 0x121fffff).ram().w(FUNC(model2o_state::tex0_w)).mirror(0x200000).share("textureram0").flags(i960_cpu_device::BURST); // texture RAM 0 map(0x12400000, 0x125fffff).ram().w(FUNC(model2o_state::tex1_w)).mirror(0x200000).share("textureram1").flags(i960_cpu_device::BURST); // texture RAM 1 + map(0x12800000, 0x1281ffff).rw(FUNC(model2_tgp_state::lumaram_r), FUNC(model2_tgp_state::lumaram_w)).umask32(0x000000ff).flags(i960_cpu_device::BURST); // polygon "luma" RAM } /* original Model 2 overrides */ @@ -1390,6 +1390,7 @@ void model2b_state::model2b_crx_mem(address_map &map) map(0x11300000, 0x113fffff).ram().share("textureram1").flags(i960_cpu_device::BURST); // texture RAM 1 (2b/2c) map(0x11400000, 0x1140ffff).rw(FUNC(model2b_state::lumaram_r), FUNC(model2b_state::lumaram_w)).flags(i960_cpu_device::BURST); // polygon "luma" RAM (2b/2c) map(0x12800000, 0x1281ffff).rw(FUNC(model2b_state::lumaram_r), FUNC(model2b_state::lumaram_w)).umask32(0x0000ffff).flags(i960_cpu_device::BURST); // polygon "luma" RAM + map(0x11400000, 0x1140ffff).rw(FUNC(model2b_state::lumaram_r), FUNC(model2b_state::lumaram_w)).umask16(0x00ff).flags(i960_cpu_device::BURST); // polygon "luma" RAM (2b/2c) map(0x01c00000, 0x01c0001f).rw("io", FUNC(sega_315_5649_device::read), FUNC(sega_315_5649_device::write)).umask32(0x00ff00ff); map(0x01c00040, 0x01c00043).nopw(); @@ -1426,6 +1427,7 @@ void model2c_state::model2c_crx_mem(address_map &map) map(0x11200000, 0x113fffff).ram().share("textureram1").flags(i960_cpu_device::BURST); // texture RAM 1 (2b/2c) map(0x11400000, 0x1140ffff).rw(FUNC(model2c_state::lumaram_r), FUNC(model2c_state::lumaram_w)).flags(i960_cpu_device::BURST); // polygon "luma" RAM (2b/2c) map(0x12800000, 0x1281ffff).rw(FUNC(model2c_state::lumaram_r), FUNC(model2c_state::lumaram_w)).umask32(0x0000ffff).flags(i960_cpu_device::BURST); // polygon "luma" RAM + map(0x11400000, 0x1140ffff).rw(FUNC(model2c_state::lumaram_r), FUNC(model2c_state::lumaram_w)).umask16(0x00ff).flags(i960_cpu_device::BURST); // polygon "luma" RAM (2b/2c) map(0x01c00000, 0x01c0001f).rw("io", FUNC(sega_315_5649_device::read), FUNC(sega_315_5649_device::write)).umask32(0x00ff00ff); map(0x01c80000, 0x01c80001).rw(FUNC(model2c_state::model2_serial_r), FUNC(model2c_state::model2_serial_w)).umask16(0x00ff); diff --git a/src/mame/sega/model2.h b/src/mame/sega/model2.h index 964de0035acee..58dcb39c4f6e7 100644 --- a/src/mame/sega/model2.h +++ b/src/mame/sega/model2.h @@ -80,7 +80,7 @@ class model2_state : public driver_device required_shared_ptr m_textureram1; std::unique_ptr m_palram; std::unique_ptr m_colorxlat; - std::unique_ptr m_lumaram; + std::unique_ptr m_lumaram; u8 m_gamma_table[256]{}; std::unique_ptr m_poly; @@ -204,8 +204,8 @@ class model2_state : public driver_device void geo_init(memory_region *polygon_rom); u32 render_mode_r(); void render_mode_w(u32 data); - u16 lumaram_r(offs_t offset); - void lumaram_w(offs_t offset, u16 data, u16 mem_mask = ~0); + u8 lumaram_r(offs_t offset); + void lumaram_w(offs_t offset, u8 data); u16 fbvram_bankA_r(offs_t offset); void fbvram_bankA_w(offs_t offset, u16 data, u16 mem_mask = ~0); u16 fbvram_bankB_r(offs_t offset); @@ -618,13 +618,20 @@ struct m2_poly_extra_data u32 lumabase; u32 colorbase; u8 checker; - u32 * texsheet[6]; - u32 texwidth[6]; - u32 texheight[6]; - u32 texx[6]; - u32 texy[6]; + u32 * texsheet[2]; + u32 texwidth; + u32 texheight; + u32 texx; + u32 texy; + u8 texwrapx; + u8 texwrapy; u8 texmirrorx; u8 texmirrory; + u8 utex; + u8 utexminlod; + u32 utexx; + u32 utexy; + s32 texlod; u8 luma; }; @@ -689,7 +696,7 @@ class model2_renderer : public poly_manager int16_t m_xoffs = 0, m_yoffs = 0; template - u32 fetch_bilinear_texel(const m2_poly_extra_data& object, const u32 miplevel, const float fu, const float fv); + u32 fetch_bilinear_texel(const m2_poly_extra_data& object, const s32 miplevel, s32 fu, s32 fv); }; typedef model2_renderer::vertex_t poly_vertex; @@ -733,6 +740,7 @@ struct model2_state::triangle u16 z = 0; u16 texheader[4] = { 0, 0, 0, 0 }; u8 luma = 0; + s32 texlod = 0; int16_t viewport[4] = { 0, 0, 0, 0 }; int16_t center[2] = { 0, 0 }; u8 window = 0; @@ -750,6 +758,7 @@ struct model2_state::quad_m2 u16 z = 0; u16 texheader[4] = { 0, 0, 0, 0 }; u8 luma = 0; + s32 texlod = 0; }; /******************************************* @@ -789,7 +798,7 @@ struct model2_state::raster_state u16 min_z = 0; // Minimum sortable Z value u16 max_z = 0; // Maximum sortable Z value u16 texture_ram[0x10000]; // Texture RAM pointer - u8 log_ram[0x40000]; // Log RAM pointer + u8 log_ram[0x8000]; // Log RAM pointer u8 cur_window = 0; // Current window plane clip_plane[4][4]; // Polygon clipping planes }; diff --git a/src/mame/sega/model2_v.cpp b/src/mame/sega/model2_v.cpp index cbc04b6d509ac..f975a6c952179 100644 --- a/src/mame/sega/model2_v.cpp +++ b/src/mame/sega/model2_v.cpp @@ -76,15 +76,12 @@ - Texturing code could use a real good speed optimization. - The U and V coordinates provided by the game are in 13.3 fixed point format. - - The luma/texel combination algorithm is not known. There are currently some small color glitches here and - there, and this might be the culprit. - The log tables and distance coefficients are used to calculate the number of texels per world unit that need to be used to render a texture. Textures can also be provided with smaller levels of details and a LOD bit selector in the texture header tells the rasterizer which texture map to use. The rasterizer then can average two texture maps to do mip mapping. More information can be found on the 2B manual, on the 'Texturing' and 'Data Format' chapters. - This is currently unemulated. We always use the texture data from the bigger texture map. - - The rasterizer supports up to 128x128 'microtex' textures, which are supposed to be higher resolution textures used - to display more detail when a texture is real close to the viewer. This is currently unemulated. + - The rasterizer supports 128x128 'microtextures' which are typically used to add more details to a texture when it is + close enough to the viewer. *********************************************************************************************************************************/ @@ -230,8 +227,8 @@ static int32_t clip_polygon(poly_vertex *v, int32_t num_vertices, poly_vertex *v out[outcount].x = cur->x + ((v[nextvert].x - cur->x) * scale); out[outcount].y = cur->y + ((v[nextvert].y - cur->y) * scale); out[outcount].pz = cur->pz + ((v[nextvert].pz - cur->pz) * scale); - out[outcount].pu = (u16)((float)cur->pu + (((float)v[nextvert].pu - (float)cur->pu) * scale)); - out[outcount].pv = (u16)((float)cur->pv + (((float)v[nextvert].pv - (float)cur->pv) * scale)); + out[outcount].pu = cur->pu + ((v[nextvert].pu - cur->pu) * scale); + out[outcount].pv = cur->pv + ((v[nextvert].pv - cur->pv) * scale); outcount++; } @@ -412,6 +409,10 @@ void model2_state::model2_3d_process_quad( raster_state *raster, u32 attr ) /* set the luma value of this quad */ object.luma = (raster->command_buffer[9] >> 15) & 0xff; + /* set the texture LOD of this quad */ + object.texlod = ((raster->command_buffer[10] >> 8) & 0x7f80) - 0x3f80; + object.texlod += raster->log_ram[raster->command_buffer[10] & 0x7fff]; + /* determine whether we can cull this quad */ cull = check_culling(raster,attr,min_z,max_z); @@ -488,6 +489,7 @@ void model2_state::model2_3d_process_quad( raster_state *raster, u32 attr ) tri->texheader[2] = object.texheader[2]; tri->texheader[3] = object.texheader[3]; tri->luma = object.luma; + tri->texlod = object.texlod; /* set the viewport */ tri->viewport[0] = raster->viewport[0]; @@ -638,6 +640,10 @@ void model2_state::model2_3d_process_triangle( raster_state *raster, u32 attr ) /* set the luma value of this triangle */ object.luma = (raster->command_buffer[9] >> 15) & 0xff; + /* set the texture LOD of this triangle */ + object.texlod = ((raster->command_buffer[10] >> 8) & 0x7f80) - 0x3f80; + object.texlod += raster->log_ram[raster->command_buffer[10] & 0x7fff]; + /* determine whether we can cull this triangle */ cull = check_culling(raster,attr,min_z,max_z); @@ -715,6 +721,7 @@ void model2_state::model2_3d_process_triangle( raster_state *raster, u32 attr ) tri->texheader[2] = object.texheader[2]; tri->texheader[3] = object.texheader[3]; tri->luma = object.luma; + tri->texlod = object.texlod; /* set the viewport */ tri->viewport[0] = raster->viewport[0]; @@ -806,43 +813,29 @@ void model2_renderer::model2_3d_render(triangle *tri, const rectangle &cliprect) extra.lumabase = (tri->texheader[1] & 0xff) << 7; extra.colorbase = (tri->texheader[3] >> 6) & 0x3ff; extra.luma = tri->luma; + extra.texlod = tri->texlod; if (renderer & 2) { extra.texmirrorx = (tri->texheader[0] >> 8) & 1; extra.texmirrory = (tri->texheader[0] >> 9) & 1; - u32* sheet = (tri->texheader[2] & 0x1000) ? m_state.m_textureram1 : m_state.m_textureram0; - u32 width = 32 << ((tri->texheader[0] >> 0) & 0x7); - u32 height = 32 << ((tri->texheader[0] >> 3) & 0x7); - u32 posx = 32 * ( (tri->texheader[2] >> 0) & 0x3f ); - u32 posy = 32 * ( (tri->texheader[2] >> 6) & 0x1f ); - - // 6 mips levels - // each mip level has half width and half height of the level above - // mips are located recursively in the bottom right corner of 2048x1024 - // each level has flipped ram banks compared to the level above - for (u32 mip = 0; mip < 6; mip++) - { - extra.texsheet[mip] = sheet; - extra.texwidth[mip] = width; - extra.texheight[mip] = height; - extra.texx[mip] = posx; - extra.texy[mip] = posy; - - width /= 2; - height /= 2; - posx = 2048 - (2048 - posx) / 2; - posy = 1024 - (1024 - posy) / 2; - if (sheet == m_state.m_textureram0) - { - sheet = m_state.m_textureram1; - } - else - { - sheet = m_state.m_textureram0; - } - } + // disable smooth wrapping if mirroring is enabled + extra.texwrapx = (tri->texheader[0] >> 6) & 1 & ~extra.texmirrorx; + extra.texwrapy = (tri->texheader[0] >> 7) & 1 & ~extra.texmirrory; + + extra.texsheet[0] = (tri->texheader[2] & 0x1000) ? m_state.m_textureram1 : m_state.m_textureram0; + extra.texsheet[1] = (tri->texheader[2] & 0x1000) ? m_state.m_textureram0 : m_state.m_textureram1; + extra.texwidth = 32 << ((tri->texheader[0] >> 0) & 0x7); + extra.texheight = 32 << ((tri->texheader[0] >> 3) & 0x7); + extra.texx = 32 * ((tri->texheader[2] >> 0) & 0x3f); + extra.texy = 32 * ((tri->texheader[2] >> 6) & 0x1f); + + // microtexture parameters + extra.utex = (tri->texheader[0] >> 12) & 1; + extra.utexminlod = (tri->texheader[0] >> 10) & 3; + extra.utexx = ((tri->texheader[2] >> 13) & 1) * 128; + extra.utexy = ((tri->texheader[2] >> 14) & 3) * 128; tri->v[0].pz = 1.0f / (tri->v[0].pz + std::numeric_limits::min()); tri->v[0].pu = tri->v[0].pu * tri->v[0].pz * (1.0f / 8.0f); @@ -1155,7 +1148,7 @@ void model2_state::model2_3d_push( raster_state *raster, u32 input ) if ( address & 0x800000 ) raster->texture_ram[address & 0xffff] = raster->command_buffer[2]; else - raster->log_ram[address & 0xffff] = raster->command_buffer[2]; + raster->log_ram[address & 0x7fff] = raster->command_buffer[2]; /* increment the address and decrease the count */ raster->command_buffer[0]++; @@ -2612,7 +2605,7 @@ void model2_state::video_start() /* init various video-related pointers */ m_palram = make_unique_clear(0x4000/2); m_colorxlat = make_unique_clear(0xc000/2); - m_lumaram = make_unique_clear(0x10000/2); + m_lumaram = make_unique_clear(0x8000); m_fbvramA = make_unique_clear(0x80000/2); m_fbvramB = make_unique_clear(0x80000/2); @@ -2631,7 +2624,7 @@ void model2_state::video_start() save_item(NAME(m_render_mode)); save_pointer(NAME(m_palram), 0x4000/2); save_pointer(NAME(m_colorxlat), 0xc000/2); - save_pointer(NAME(m_lumaram), 0x10000/2); + save_pointer(NAME(m_lumaram), 0x8000); save_pointer(NAME(m_gamma_table), 256); } @@ -2689,6 +2682,7 @@ void model2_state::tri_list_dump(FILE *dst) fprintf( dst, "texheader - 2: %04x\n", m_raster->tri_list[i].texheader[2] ); fprintf( dst, "texheader - 3: %04x\n", m_raster->tri_list[i].texheader[3] ); fprintf( dst, "luma: %02x\n", m_raster->tri_list[i].luma ); + fprintf( dst, "texlod: %08x\n", m_raster->tri_list[i].texlod ); fprintf( dst, "vp.sx: %04x\n", m_raster->tri_list[i].viewport[0] ); fprintf( dst, "vp.sy: %04x\n", m_raster->tri_list[i].viewport[1] ); fprintf( dst, "vp.ex: %04x\n", m_raster->tri_list[i].viewport[2] ); diff --git a/src/mame/sega/model2rd.ipp b/src/mame/sega/model2rd.ipp index 433c89664a713..22d3144196e71 100644 --- a/src/mame/sega/model2rd.ipp +++ b/src/mame/sega/model2rd.ipp @@ -62,84 +62,142 @@ void model2_renderer::draw_scanline_solid(int32_t scanline, const extent_t& exte p[x] = color; } +#define LERP(X, Y, A) (((X) + ((((Y) - (X)) * (A)) >> 8)) & 0x00ff00ff) + template -u32 model2_renderer::fetch_bilinear_texel(const m2_poly_extra_data& object, const u32 miplevel, const float fu, const float fv ) +u32 model2_renderer::fetch_bilinear_texel(const m2_poly_extra_data& object, const s32 miplevel, s32 u, s32 v) { - constexpr float lodfactor[6] = { 256.0F, 128.0F, 64.0F, 32.0F, 16.0F, 8.0F }; - u32 tex_mirr_x = object.texmirrorx; - u32 tex_mirr_y = object.texmirrory; - u32 tex_width = object.texwidth[miplevel]; - u32 tex_height = object.texheight[miplevel]; - u32 *sheet = object.texsheet[miplevel]; - u32 tex_x = object.texx[miplevel]; - u32 tex_y = object.texy[miplevel]; - u32 tex_x_mask = tex_width - 1; - u32 tex_y_mask = tex_height - 1; - s32 u = fu * lodfactor[miplevel]; - s32 v = fv * lodfactor[miplevel]; - u32 t, tex1, tex2, tex3, tex4, frac1, frac2, frac3, frac4; - int u2, u2n; - int v2, v2n; - - u2 = u >> 8; - v2 = v >> 8; - - if (tex_mirr_x && ((u2 & tex_width) != 0)) // Only flip if even number of tilings + u32 tex_wrap_x = object.texwrapx; + u32 tex_wrap_y = object.texwrapy; + u32 tex_mirr_x = object.texmirrorx; + u32 tex_mirr_y = object.texmirrory; + u32 tex_width, tex_height; + u32 tex_x, tex_y; + u32* sheet; + + if (miplevel == -1) { - u2 = (u2 ^ tex_x_mask) & tex_x_mask; - u2n = std::max(0, u2 - 1); // Ensure sample is inside texture + // microtexture + tex_width = 128; + tex_height = 128; + tex_x = object.utexx; + tex_y = object.utexy; + sheet = object.texsheet[1]; + u <<= 1 << object.utexminlod; + v <<= 1 << object.utexminlod; } else { - u2 &= tex_x_mask; - u2n = std::min(u2 + 1, (int)tex_x_mask); // Ensure sample is inside texture + // regular texture + tex_width = object.texwidth >> miplevel; + tex_height = object.texheight >> miplevel; + tex_x = ((object.texx - 2048) >> miplevel) & 2047; + tex_y = ((object.texy - 1024) >> miplevel) & 1023; + sheet = object.texsheet[miplevel & 1]; + u >>= miplevel; + v >>= miplevel; } - if (tex_mirr_y && ((v2 & tex_height) != 0)) // Only flip if even number of tilings + + if (tex_mirr_x && (u & (tex_width << 8))) + u = ~u; + + if (tex_mirr_y && (v & (tex_height << 8))) + v = ~v; + + // subtract 1/2 texel + u -= 0x80; + v -= 0x80; + + // extract the fractions to use as blending factors + u32 ufrac = u & 0xff; + u32 vfrac = v & 0xff; + + // get the four texel locations and confine to texture dimensions + u32 u0 = (u >> 8) & (tex_width - 1); + u32 u1 = (u0 + 1) & (tex_width - 1); + u32 v0 = (v >> 8) & (tex_height - 1); + u32 v1 = (v0 + 1) & (tex_height - 1); + + // clamp the texture coordinates if smooth wrapping is not enabled + if (!tex_wrap_x && u1 == 0) { - v2 = (v2 ^ tex_y_mask) & tex_y_mask; - v2n = std::max(0, v2 - 1); // Ensure sample is inside texture + if (ufrac >= 0x80) + u0 = u1, u1++, ufrac = 0; // left edge of texture + else + u1 = u0, u0--, ufrac = 0x100; // right edge of texture } - else + + if (!tex_wrap_y && v1 == 0) + { + if (vfrac >= 0x80) + v0 = 0, v1++, vfrac = 0; // top edge of texture + else + v1 = v0, v0--, vfrac = 0x100; // bottom edge of texture + } + + // read the four texels from the texture sheet + u32 tex00 = get_texel(tex_x, tex_y, u0, v0, sheet) << 4; + u32 tex01 = get_texel(tex_x, tex_y, u1, v0, sheet) << 4; + u32 tex10 = get_texel(tex_x, tex_y, u0, v1, sheet) << 4; + u32 tex11 = get_texel(tex_x, tex_y, u1, v1, sheet) << 4; + + if (Translucent) { - v2 &= tex_y_mask; - v2n = std::min(v2 + 1, (int)tex_y_mask); // Ensure sample is inside texture + // pack the alpha components into the upper 16 bits + if (tex00 != 0xf0) tex00 |= 0x00800000; + if (tex01 != 0xf0) tex01 |= 0x00800000; + if (tex10 != 0xf0) tex10 |= 0x00800000; + if (tex11 != 0xf0) tex11 |= 0x00800000; + + // if a texel is transparent, it takes the luma value of the neighboring texel + if (tex00 == 0x000000f0) tex00 = tex01 & 0xff; + if (tex01 == 0x000000f0) tex01 = tex00 & 0xff; + if (tex10 == 0x000000f0) tex10 = tex11 & 0xff; + if (tex11 == 0x000000f0) tex11 = tex10 & 0xff; } - frac1 = u & 0xff; - frac2 = 0x100 - frac1; - frac3 = v & 0xff; - frac4 = 0x100 - frac3; - tex1 = get_texel(tex_x, tex_y, u2, v2, sheet); - tex2 = get_texel(tex_x, tex_y, u2n, v2, sheet); - tex3 = get_texel(tex_x, tex_y, u2, v2n, sheet); - tex4 = get_texel(tex_x, tex_y, u2n, v2n, sheet); + // linearly interpolate between left and right texels + u32 tex0x = LERP(tex00, tex01, ufrac); + u32 tex1x = LERP(tex10, tex11, ufrac); + if (Translucent) { - u32 alp1 = (tex1 + 1) >> 4; - u32 alp2 = (tex2 + 1) >> 4; - u32 alp3 = (tex3 + 1) >> 4; - u32 alp4 = (tex4 + 1) >> 4; - u32 alp = alp1 * frac2 * frac4 + alp2 * frac1 * frac4 + alp3 * frac2 * frac3 + alp4 * frac1 * frac3; - if (alp >= 0x8000) - return 0xffffffff; - - // Anti Alpha Highlighted Edges - tex1 &= alp1 - 1; - tex2 &= alp2 - 1; - tex3 &= alp3 - 1; - tex4 &= alp4 - 1; - u32 maxValidTex = std::max(std::max(std::max(tex1, tex2), tex3), tex4); - if (alp1) - tex1 = maxValidTex; - if (alp2) - tex2 = maxValidTex; - if (alp3) - tex3 = maxValidTex; - if (alp4) - tex4 = maxValidTex; + if (tex0x == 0x000000f0) tex0x = tex1x & 0xff; + if (tex1x == 0x000000f0) tex1x = tex0x & 0xff; } - t = (tex1 * frac2 * frac4) + (tex2 * frac1 * frac4) + (tex3 * frac2 * frac3) + (tex4 * frac1 * frac3); - return t >> 8; + + // calculate the final bilinear filtered texel + return LERP(tex0x, tex1x, vfrac); +} + +// mostly copied from video/voodoo_render.cpp +inline s32 ATTR_FORCE_INLINE fast_log2(float value) +{ + // return 0 for negative values; should never happen + if (UNEXPECTED(value < 0.0f)) + return 0; + + // we only need the exponent and highest 7 bits of mantissa + u32 ival = f2u(value) >> 16; + + // extract exponent + s32 exp = (ival >> 7) - 127; + + // use top 7 bits of mantissa to look up fractional log2 + static u8 const s_log2_table[128] = + { + 0, 2, 5, 8, 11, 14, 16, 19, 22, 25, 27, 30, 33, 35, 38, 40, + 43, 46, 48, 51, 53, 56, 58, 61, 63, 65, 68, 70, 73, 75, 77, 80, + 82, 84, 87, 89, 91, 93, 96, 98, 100, 102, 104, 106, 109, 111, 113, 115, + 117, 119, 121, 123, 125, 127, 129, 132, 134, 136, 138, 140, 141, 143, 145, 147, + 149, 151, 153, 155, 157, 159, 161, 162, 164, 166, 168, 170, 172, 173, 175, 177, + 179, 181, 182, 184, 186, 188, 189, 191, 193, 194, 196, 198, 200, 201, 203, 205, + 206, 208, 209, 211, 213, 214, 216, 218, 219, 221, 222, 224, 225, 227, 229, 230, + 232, 233, 235, 236, 238, 239, 241, 242, 244, 245, 247, 248, 250, 251, 253, 254 + }; + + // combine and return result + return (exp << 8) | s_log2_table[ival & 127]; } // textured render path @@ -153,7 +211,7 @@ void model2_renderer::draw_scanline_tex(int32_t scanline, const extent_t &extent const u16 *colortable_r = &state->m_colorxlat[0x0000/2]; const u16 *colortable_g = &state->m_colorxlat[0x4000/2]; const u16 *colortable_b = &state->m_colorxlat[0x8000/2]; - const u16 *lumaram = &state->m_lumaram[0]; + const u8 *lumaram = &state->m_lumaram[0]; u32 colorbase = object.colorbase; u32 lumabase = object.lumabase; u8 checker = object.checker; @@ -162,14 +220,11 @@ void model2_renderer::draw_scanline_tex(int32_t scanline, const extent_t &extent float uoz = extent.param[1].start; float voz = extent.param[2].start; float dooz = extent.param[0].dpdx; - float dudxoz = extent.param[1].dpdx; - float dvdxoz = extent.param[2].dpdx; - float dudyoz = extent.param[1].dpdy; - float dvdyoz = extent.param[2].dpdy; - float norm = sqrtf( std::max(dudxoz * dudxoz + dvdxoz * dvdxoz, dudyoz * dudyoz + dvdyoz * dvdyoz) ); - int tr, tg, tb; - u32 t, t2; - u8 luma; + float duoz = extent.param[1].dpdx; + float dvoz = extent.param[2].dpdx; + + // calculate maximum mipmap level from texture dimensions; we go down to 2x2 + s32 max_level = (f2u((float)std::min(object.texwidth, object.texheight)) >> 23) - 128; colorbase = state->m_palram[(colorbase + 0x1000)] & 0x7fff; @@ -183,44 +238,60 @@ void model2_renderer::draw_scanline_tex(int32_t scanline, const extent_t &extent { // if the first pixel is transparent, skip to the next one if (!((x ^ scanline) & 1)) - x++, ooz += dooz, uoz += dudxoz, voz += dvdxoz; + x++, ooz += dooz, uoz += duoz, voz += dvoz; // increment by 2 pixels each time, skipping every other pixel - dx = 2, dooz *= 2.0f, dudxoz *= 2.0f, dvdxoz *= 2.0f; + dx = 2, dooz *= 2.0f, duoz *= 2.0f, dvoz *= 2.0f; } - for (; x < extent.stopx; x += dx, uoz += dudxoz, voz += dvdxoz, ooz += dooz) + for (; x < extent.stopx; x += dx, ooz += dooz, uoz += duoz, voz += dvoz) { float z = recip_approx(ooz); - float mml = log2f(norm * z) - 2.0F; // No parts are squared so no need for the usual 0.5 factor - u32 level = std::min(std::max(0, (int)mml), 4); // We need room for one more level for trilinear - float fu = uoz * z; - float fv = voz * z; - t = fetch_bilinear_texel(object, level, fu, fv); - if (t == 0xffffffff) - continue; + s32 mml = -object.texlod + fast_log2(z); // equivalent to log2(z^2) + s32 level = std::clamp(mml >> 7, 0, max_level); - t2 = fetch_bilinear_texel(object, level + 1, fu, fv); - if (t2 != 0xffffffff) + // we give texture coordinates 8 fractional bits + s32 u = (s32)(uoz * z * 256.0f); + s32 v = (s32)(voz * z * 256.0f); + + u32 t = fetch_bilinear_texel(object, level, u, v); + + if (mml > 0 && level < max_level) + { + u32 t2 = fetch_bilinear_texel(object, level + 1, u, v); + s32 frac = (mml & 127) << 1; + t = LERP(t, t2, frac); + } + else if (object.utex && mml < 0) { - // Trilinear combination - int frac = int((mml - level) * 256.0F); - frac = std::min(std::max(frac, 0), 256); - t = ((256 - frac) * t + frac * t2) >> 8; + // microtexture; blend up to 50% + u32 t2 = fetch_bilinear_texel(object, -1, u, v); + s32 frac = std::min(-mml >> object.utexminlod, 128); + t = LERP(t, t2, frac); + } + + if (Translucent) + { + // if alpha is less than 50%, discard + if (t < 0x00400000) + continue; + + // remove the alpha value; no longer needed + t &= 0xff; } - // Trilinear combination has 8 bits of precision, and the table needs t to be shifted by 3 on the left - luma = (u32)lumaram[lumabase + (t >> (8 - 3))] * object.luma / 256; + // filtered texel has 8 bits of precision but translator map has 128 (7-bit) entries; need to shift right by 1 + u8 luma = (u32)lumaram[lumabase + (t >> 1)] * object.luma / 256; // Virtua Striker sets up a luma of 0x40 for national flags on bleachers, fix here. luma = std::min(int(luma), 0x3f); /* we have the 6 bits of luma information along with 5 bits per color component */ /* now build and index into the master color lookup table and extract the raw RGB values */ - tr = colortable_r[(luma)] & 0xff; - tg = colortable_g[(luma)] & 0xff; - tb = colortable_b[(luma)] & 0xff; + u32 tr = colortable_r[(luma)] & 0xff; + u32 tg = colortable_g[(luma)] & 0xff; + u32 tb = colortable_b[(luma)] & 0xff; tr = gamma_value[tr]; tg = gamma_value[tg]; tb = gamma_value[tb];