From eec34c1efb8b2baec4a5693eb8ab3aa8ebd9ef66 Mon Sep 17 00:00:00 2001 From: Orange Date: Thu, 30 Oct 2025 05:38:11 +0300 Subject: [PATCH] removed brackets Initial plan Initial exploration and analysis complete Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> Optimize performance: A* pathfinding, Vector3 hash, pattern scanner, AVX2 code, and serialization Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> Add bounds check for navigation mesh serialization Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> Document serialization limitation for large neighbor counts Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> Add _codeql_build_dir to gitignore Co-authored-by: orange-cpp <59374393+orange-cpp@users.noreply.github.com> Removes codeql detected source root Eliminates the automatically generated file that was causing issues. This file was added by codeql and no longer needed. revert cleaned up gitignore moved to anon namespace Improves navigation mesh serialization and clamping Ensures correct serialization of navigation meshes by clamping neighbor counts to prevent data corruption when exceeding uint16_t limits. Updates data types to `std::uint8_t` and `std::size_t` for consistency. Uses `std::copy_n` instead of `std::memcpy` for deserialization. --- .clang-format | 2 +- include/omath/pathfinding/a_star.hpp | 4 - source/pathfinding/a_star.cpp | 58 +++++---- source/pathfinding/navigation_mesh.cpp | 53 +++++--- .../proj_pred_engine_avx2.cpp | 119 +++++++++--------- 5 files changed, 135 insertions(+), 101 deletions(-) diff --git a/.clang-format b/.clang-format index 0cd7d628..0d5d0d47 100644 --- a/.clang-format +++ b/.clang-format @@ -56,7 +56,7 @@ SpaceBeforeParensOptions: AfterIfMacros: true AfterOverloadedOperator: false BeforeNonEmptyParentheses: false -SpaceBeforeRangeBasedForLoopColon: false +SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyParentheses: false SpacesInCStyleCastParentheses: false SpacesInConditionalStatement: false diff --git a/include/omath/pathfinding/a_star.hpp b/include/omath/pathfinding/a_star.hpp index 218a059c..c785afd4 100644 --- a/include/omath/pathfinding/a_star.hpp +++ b/include/omath/pathfinding/a_star.hpp @@ -22,9 +22,5 @@ namespace omath::pathfinding static std::vector> reconstruct_final_path(const std::unordered_map, PathNode>& closed_list, const Vector3& current) noexcept; - - [[nodiscard]] - static auto get_perfect_node(const std::unordered_map, PathNode>& open_list, - const Vector3& end_vertex) noexcept; }; } // namespace omath::pathfinding diff --git a/source/pathfinding/a_star.cpp b/source/pathfinding/a_star.cpp index bcd73484..a2bff259 100644 --- a/source/pathfinding/a_star.cpp +++ b/source/pathfinding/a_star.cpp @@ -4,9 +4,25 @@ #include "omath/pathfinding/a_star.hpp" #include #include +#include #include #include +namespace +{ + struct OpenListNode final + { + omath::Vector3 position; + float f_cost; + + [[nodiscard]] + bool operator>(const OpenListNode& other) const noexcept + { + return f_cost > other.f_cost; + } + }; +} + namespace omath::pathfinding { struct PathNode final @@ -37,23 +53,13 @@ namespace omath::pathfinding std::ranges::reverse(path); return path; } - auto Astar::get_perfect_node(const std::unordered_map, PathNode>& open_list, - const Vector3& end_vertex) noexcept - { - return std::ranges::min_element(open_list, - [&end_vertex](const auto& a, const auto& b) - { - const float fa = a.second.g_cost + a.first.distance_to(end_vertex); - const float fb = b.second.g_cost + b.first.distance_to(end_vertex); - return fa < fb; - }); - } std::vector> Astar::find_path(const Vector3& start, const Vector3& end, const NavigationMesh& nav_mesh) noexcept { std::unordered_map, PathNode> closed_list; - std::unordered_map, PathNode> open_list; + std::unordered_map, PathNode> node_data; + std::priority_queue, std::greater<>> open_list; auto maybe_start_vertex = nav_mesh.get_closest_vertex(start); auto maybe_end_vertex = nav_mesh.get_closest_vertex(end); @@ -64,20 +70,27 @@ namespace omath::pathfinding const auto start_vertex = maybe_start_vertex.value(); const auto end_vertex = maybe_end_vertex.value(); - open_list.emplace(start_vertex, PathNode{std::nullopt, 0.f}); + node_data.emplace(start_vertex, PathNode{std::nullopt, 0.f}); + open_list.push({start_vertex, start_vertex.distance_to(end_vertex)}); while (!open_list.empty()) { - auto current_it = get_perfect_node(open_list, end_vertex); + auto current = open_list.top().position; + open_list.pop(); + + if (closed_list.contains(current)) + continue; + + auto current_node_it = node_data.find(current); + if (current_node_it == node_data.end()) + continue; - const auto current = current_it->first; - const auto current_node = current_it->second; + const auto current_node = current_node_it->second; if (current == end_vertex) return reconstruct_final_path(closed_list, current); closed_list.emplace(current, current_node); - open_list.erase(current_it); for (const auto& neighbor: nav_mesh.get_neighbors(current)) { @@ -86,11 +99,14 @@ namespace omath::pathfinding const float tentative_g_cost = current_node.g_cost + neighbor.distance_to(current); - // ReSharper disable once CppTooWideScopeInitStatement - const auto open_it = open_list.find(neighbor); + auto node_it = node_data.find(neighbor); - if (open_it == open_list.end() || tentative_g_cost < open_it->second.g_cost) - open_list[neighbor] = PathNode{current, tentative_g_cost}; + if (node_it == node_data.end() || tentative_g_cost < node_it->second.g_cost) + { + node_data[neighbor] = PathNode{current, tentative_g_cost}; + const float f_cost = tentative_g_cost + neighbor.distance_to(end_vertex); + open_list.push({neighbor, f_cost}); + } } } diff --git a/source/pathfinding/navigation_mesh.cpp b/source/pathfinding/navigation_mesh.cpp index 017fbe26..370bc8fd 100644 --- a/source/pathfinding/navigation_mesh.cpp +++ b/source/pathfinding/navigation_mesh.cpp @@ -3,6 +3,8 @@ // #include "omath/pathfinding/navigation_mesh.hpp" #include +#include +#include #include namespace omath::pathfinding { @@ -30,55 +32,68 @@ namespace omath::pathfinding std::vector NavigationMesh::serialize() const noexcept { - auto dump_to_vector = [](const T& t, std::vector& vec) - { - for (size_t i = 0; i < sizeof(t); i++) - vec.push_back(*(reinterpret_cast(&t) + i)); - }; - - std::vector raw; + std::vector raw; - for (const auto& [vertex, neighbors]: m_vertex_map) + // Pre-calculate total size for better performance + std::size_t total_size = 0; + for (const auto& [vertex, neighbors] : m_vertex_map) { - const auto neighbors_count = neighbors.size(); + total_size += sizeof(vertex) + sizeof(std::uint16_t) + sizeof(Vector3) * neighbors.size(); + } + raw.reserve(total_size); - dump_to_vector(vertex, raw); - dump_to_vector(neighbors_count, raw); + auto dump_to_vector = [&raw](const T& t) + { + const auto* byte_ptr = reinterpret_cast(&t); + raw.insert(raw.end(), byte_ptr, byte_ptr + sizeof(T)); + }; - for (const auto& neighbor: neighbors) - dump_to_vector(neighbor, raw); + for (const auto& [vertex, neighbors] : m_vertex_map) + { + // Clamp neighbors count to fit in uint16_t (prevents silent data corruption) + // NOTE: If neighbors.size() > 65535, only the first 65535 neighbors will be serialized. + // This is a limitation of the current serialization format using uint16_t for count. + const auto clamped_count = + std::min(neighbors.size(), std::numeric_limits::max()); + const auto neighbors_count = static_cast(clamped_count); + + dump_to_vector(vertex); + dump_to_vector(neighbors_count); + + // Only serialize up to the clamped count + for (std::size_t i = 0; i < clamped_count; ++i) + dump_to_vector(neighbors[i]); } return raw; } void NavigationMesh::deserialize(const std::vector& raw) noexcept { - auto load_from_vector = [](const std::vector& vec, size_t& offset, auto& value) + auto load_from_vector = [](const std::vector& vec, std::size_t& offset, auto& value) { if (offset + sizeof(value) > vec.size()) - { throw std::runtime_error("Deserialize: Invalid input data size."); - } + std::copy_n(vec.data() + offset, sizeof(value), reinterpret_cast(&value)); offset += sizeof(value); }; m_vertex_map.clear(); - size_t offset = 0; + std::size_t offset = 0; while (offset < raw.size()) { Vector3 vertex; load_from_vector(raw, offset, vertex); - uint16_t neighbors_count; + std::uint16_t neighbors_count; load_from_vector(raw, offset, neighbors_count); std::vector> neighbors; neighbors.reserve(neighbors_count); - for (size_t i = 0; i < neighbors_count; ++i) + for (std::size_t i = 0; i < neighbors_count; ++i) { Vector3 neighbor; load_from_vector(raw, offset, neighbor); diff --git a/source/projectile_prediction/proj_pred_engine_avx2.cpp b/source/projectile_prediction/proj_pred_engine_avx2.cpp index 9a26c435..9605db6d 100644 --- a/source/projectile_prediction/proj_pred_engine_avx2.cpp +++ b/source/projectile_prediction/proj_pred_engine_avx2.cpp @@ -18,97 +18,104 @@ namespace omath::projectile_prediction [[maybe_unused]] const Target& target) const { #if defined(OMATH_USE_AVX2) && defined(__i386__) && defined(__x86_64__) - const float bulletGravity = m_gravityConstant * projectile.m_gravityScale; - const float v0 = projectile.m_launchSpeed; - const float v0Sqr = v0 * v0; - const Vector3 projOrigin = projectile.m_origin; + const float bullet_gravity = m_gravity_constant * projectile.m_gravity_scale; + const float v0 = projectile.m_launch_speed; + const float v0_sqr = v0 * v0; + const Vector3 proj_origin = projectile.m_origin; constexpr int SIMD_FACTOR = 8; - float currentTime = m_simulationTimeStep; + float current_time = m_simulation_time_step; - for (; currentTime <= m_maximumSimulationTime; currentTime += m_simulationTimeStep * SIMD_FACTOR) + for (; current_time <= m_maximum_simulation_time; current_time += m_simulation_time_step * SIMD_FACTOR) { const __m256 times - = _mm256_setr_ps(currentTime, currentTime + m_simulationTimeStep, - currentTime + m_simulationTimeStep * 2, currentTime + m_simulationTimeStep * 3, - currentTime + m_simulationTimeStep * 4, currentTime + m_simulationTimeStep * 5, - currentTime + m_simulationTimeStep * 6, currentTime + m_simulationTimeStep * 7); + = _mm256_setr_ps(current_time, current_time + m_simulation_time_step, + current_time + m_simulation_time_step * 2, current_time + m_simulation_time_step * 3, + current_time + m_simulation_time_step * 4, current_time + m_simulation_time_step * 5, + current_time + m_simulation_time_step * 6, current_time + m_simulation_time_step * 7); - const __m256 targetX + const __m256 target_x = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.x), times, _mm256_set1_ps(target.m_origin.x)); - const __m256 targetY + const __m256 target_y = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.y), times, _mm256_set1_ps(target.m_origin.y)); - const __m256 timesSq = _mm256_mul_ps(times, times); - const __m256 targetZ = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.z), times, - _mm256_fnmadd_ps(_mm256_set1_ps(0.5f * m_gravityConstant), timesSq, + const __m256 times_sq = _mm256_mul_ps(times, times); + const __m256 target_z = _mm256_fmadd_ps(_mm256_set1_ps(target.m_velocity.z), times, + _mm256_fnmadd_ps(_mm256_set1_ps(0.5f * m_gravity_constant), times_sq, _mm256_set1_ps(target.m_origin.z))); - const __m256 deltaX = _mm256_sub_ps(targetX, _mm256_set1_ps(projOrigin.x)); - const __m256 deltaY = _mm256_sub_ps(targetY, _mm256_set1_ps(projOrigin.y)); - const __m256 deltaZ = _mm256_sub_ps(targetZ, _mm256_set1_ps(projOrigin.z)); + const __m256 delta_x = _mm256_sub_ps(target_x, _mm256_set1_ps(proj_origin.x)); + const __m256 delta_y = _mm256_sub_ps(target_y, _mm256_set1_ps(proj_origin.y)); + const __m256 delta_z = _mm256_sub_ps(target_z, _mm256_set1_ps(proj_origin.z)); - const __m256 dSqr = _mm256_add_ps(_mm256_mul_ps(deltaX, deltaX), _mm256_mul_ps(deltaY, deltaY)); + const __m256 d_sqr = _mm256_add_ps(_mm256_mul_ps(delta_x, delta_x), _mm256_mul_ps(delta_y, delta_y)); - const __m256 bgTimesSq = _mm256_mul_ps(_mm256_set1_ps(bulletGravity), timesSq); - const __m256 term = _mm256_add_ps(deltaZ, _mm256_mul_ps(_mm256_set1_ps(0.5f), bgTimesSq)); - const __m256 termSq = _mm256_mul_ps(term, term); - const __m256 numerator = _mm256_add_ps(dSqr, termSq); - const __m256 denominator = _mm256_add_ps(timesSq, _mm256_set1_ps(1e-8f)); // Avoid division by zero - const __m256 requiredV0Sqr = _mm256_div_ps(numerator, denominator); + const __m256 bg_times_sq = _mm256_mul_ps(_mm256_set1_ps(bullet_gravity), times_sq); + const __m256 term = _mm256_add_ps(delta_z, _mm256_mul_ps(_mm256_set1_ps(0.5f), bg_times_sq)); + const __m256 term_sq = _mm256_mul_ps(term, term); + const __m256 numerator = _mm256_add_ps(d_sqr, term_sq); + const __m256 denominator = _mm256_add_ps(times_sq, _mm256_set1_ps(1e-8f)); // Avoid division by zero + const __m256 required_v0_sqr = _mm256_div_ps(numerator, denominator); - const __m256 v0SqrVec = _mm256_set1_ps(v0Sqr + 1e-3f); - const __m256 mask = _mm256_cmp_ps(requiredV0Sqr, v0SqrVec, _CMP_LE_OQ); + const __m256 v0_sqr_vec = _mm256_set1_ps(v0_sqr + 1e-3f); + const __m256 mask = _mm256_cmp_ps(required_v0_sqr, v0_sqr_vec, _CMP_LE_OQ); - const unsigned validMask = _mm256_movemask_ps(mask); + const unsigned valid_mask = _mm256_movemask_ps(mask); - if (!validMask) + if (!valid_mask) continue; - alignas(32) float validTimes[SIMD_FACTOR]; - _mm256_store_ps(validTimes, times); + alignas(32) float valid_times[SIMD_FACTOR]; + _mm256_store_ps(valid_times, times); for (int i = 0; i < SIMD_FACTOR; ++i) { - if (!(validMask & (1 << i))) + if (!(valid_mask & (1 << i))) continue; - const float candidateTime = validTimes[i]; + const float candidate_time = valid_times[i]; - if (candidateTime > m_maximumSimulationTime) + if (candidate_time > m_maximum_simulation_time) continue; // Fine search around candidate time - for (float fineTime = candidateTime - m_simulationTimeStep * 2; - fineTime <= candidateTime + m_simulationTimeStep * 2; fineTime += m_simulationTimeStep) + for (float fine_time = candidate_time - m_simulation_time_step * 2; + fine_time <= candidate_time + m_simulation_time_step * 2; fine_time += m_simulation_time_step) { - if (fineTime < 0) + if (fine_time < 0) continue; - const Vector3 targetPos = target.PredictPosition(fineTime, m_gravityConstant); - const auto pitch = CalculatePitch(projOrigin, targetPos, bulletGravity, v0, fineTime); + // Manually compute predicted target position to avoid adding method to Target + Vector3 target_pos = target.m_origin + target.m_velocity * fine_time; + if (target.m_is_airborne) + target_pos.z -= 0.5f * m_gravity_constant * fine_time * fine_time; + + const auto pitch = calculate_pitch(proj_origin, target_pos, bullet_gravity, v0, fine_time); if (!pitch) continue; - const Vector3 delta = targetPos - projOrigin; + const Vector3 delta = target_pos - proj_origin; const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y); - const float height = d * std::tan(angles::DegreesToRadians(*pitch)); - return Vector3(targetPos.x, targetPos.y, projOrigin.z + height); + const float height = d * std::tan(angles::degrees_to_radians(*pitch)); + return Vector3(target_pos.x, target_pos.y, proj_origin.z + height); } } } // Fallback scalar processing for remaining times - for (; currentTime <= m_maximumSimulationTime; currentTime += m_simulationTimeStep) + for (; current_time <= m_maximum_simulation_time; current_time += m_simulation_time_step) { - const Vector3 targetPos = target.PredictPosition(currentTime, m_gravityConstant); - const auto pitch = CalculatePitch(projOrigin, targetPos, bulletGravity, v0, currentTime); + Vector3 target_pos = target.m_origin + target.m_velocity * current_time; + if (target.m_is_airborne) + target_pos.z -= 0.5f * m_gravity_constant * current_time * current_time; + + const auto pitch = calculate_pitch(proj_origin, target_pos, bullet_gravity, v0, current_time); if (!pitch) continue; - const Vector3 delta = targetPos - projOrigin; + const Vector3 delta = target_pos - proj_origin; const float d = std::sqrt(delta.x * delta.x + delta.y * delta.y); - const float height = d * std::tan(angles::DegreesToRadians(*pitch)); - return Vector3(targetPos.x, targetPos.y, projOrigin.z + height); + const float height = d * std::tan(angles::degrees_to_radians(*pitch)); + return Vector3(target_pos.x, target_pos.y, proj_origin.z + height); } return std::nullopt; @@ -134,22 +141,22 @@ namespace omath::projectile_prediction return std::nullopt; const Vector3 delta = target_pos - proj_origin; - const float dSqr = delta.x * delta.x + delta.y * delta.y; + const float d_sqr = delta.x * delta.x + delta.y * delta.y; const float h = delta.z; const float term = h + 0.5f * bullet_gravity * time * time; - const float requiredV0Sqr = (dSqr + term * term) / (time * time); - const float v0Sqr = v0 * v0; + const float required_v0_sqr = (d_sqr + term * term) / (time * time); + const float v0_sqr = v0 * v0; - if (requiredV0Sqr > v0Sqr + 1e-3f) + if (required_v0_sqr > v0_sqr + 1e-3f) return std::nullopt; - if (dSqr == 0.0f) + if (d_sqr == 0.0f) return term >= 0.0f ? 90.0f : -90.0f; - const float d = std::sqrt(dSqr); - const float tanTheta = term / d; - return angles::RadiansToDegrees(std::atan(tanTheta)); + const float d = std::sqrt(d_sqr); + const float tan_theta = term / d; + return angles::radians_to_degrees(std::atan(tan_theta)); #else throw std::runtime_error( std::format("{} AVX2 feature is not enabled!", std::source_location::current().function_name()));