From 97c5055345af1e18cc37b396af3704034535b55a Mon Sep 17 00:00:00 2001 From: Ruochun Date: Wed, 14 Jan 2026 22:46:45 +0800 Subject: [PATCH 01/17] Support CUDA13 --- cmake/CudaSupportedArchitectures.cmake | 7 ++++++- src/algorithms/DEMCubWrappers.cu | 28 +++++++++++++------------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cmake/CudaSupportedArchitectures.cmake b/cmake/CudaSupportedArchitectures.cmake index b3853ed0..4d163870 100644 --- a/cmake/CudaSupportedArchitectures.cmake +++ b/cmake/CudaSupportedArchitectures.cmake @@ -31,7 +31,8 @@ function(cuda_supported_architectures) set(cu10 30 35 50 52 60 61 70 72 75) set(cu11 35 50 52 60 61 70 72 75 80) set(cu11_x 35 50 52 60 61 70 72 75 80 86) - set(cu12_x 50 52 60 61 70 72 75 80 86) + set(cu12_x 50 52 60 61 70 72 75 80 86 89 120) + set(cu13_x 75 80 86 89 90 100 120 121) if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 7) set(CUDASUP_ARCHITECTURES ${cu7} CACHE INTERNAL "") @@ -60,6 +61,10 @@ function(cuda_supported_architectures) if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12) set(CUDASUP_ARCHITECTURES ${cu12_x} CACHE INTERNAL "") endif() + + if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13) + set(CUDASUP_ARCHITECTURES ${cu13_x} CACHE INTERNAL "") + endif() if (NOT DEFINED CUDASUP_ARCHITECTURES) message(SEND_ERROR "[CUDASUP] Could not determine device architectures supported by the CUDA toolkit!") diff --git a/src/algorithms/DEMCubWrappers.cu b/src/algorithms/DEMCubWrappers.cu index 4efade16..4ecfcbe5 100644 --- a/src/algorithms/DEMCubWrappers.cu +++ b/src/algorithms/DEMCubWrappers.cu @@ -8,6 +8,14 @@ #include #include +#if CUDART_VERSION >= 13000 + #define DEME_CUB_SUM_OP(T) \ + cuda::std::plus {} +#else + #define DEME_CUB_SUM_OP(T) \ + cub::Sum {} +#endif + namespace deme { // Functor type for selecting values less than some criteria @@ -75,10 +83,11 @@ inline void cubDEMPrefixScan(T1* d_in, // let you know when it happens. I made a trick: use ExclusiveScan and (T2)0 as the initial value, and this forces // cub to store results as T2 type. size_t cub_scratch_bytes = 0; - cub::DeviceScan::ExclusiveScan(NULL, cub_scratch_bytes, d_in, d_out, cub::Sum(), (T2)0, n, this_stream); + cub::DeviceScan::ExclusiveScan(NULL, cub_scratch_bytes, d_in, d_out, DEME_CUB_SUM_OP(T2), (T2)0, n, this_stream); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); void* d_scratch_space = (void*)scratchPad.allocateScratchSpace(cub_scratch_bytes); - cub::DeviceScan::ExclusiveScan(d_scratch_space, cub_scratch_bytes, d_in, d_out, cub::Sum(), (T2)0, n, this_stream); + cub::DeviceScan::ExclusiveScan(d_scratch_space, cub_scratch_bytes, d_in, d_out, DEME_CUB_SUM_OP(T2), (T2)0, n, + this_stream); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); } @@ -156,23 +165,14 @@ inline void cubDEMReduceByKeys(T1* d_keys_in, template void cubDEMSum(T1* d_in, T2* d_out, size_t n, cudaStream_t& this_stream, DEMSolverScratchData& scratchPad) { size_t cub_scratch_bytes = 0; - cub::DeviceReduce::Reduce(NULL, cub_scratch_bytes, d_in, d_out, n, cub::Sum(), (T2)0, this_stream); + cub::DeviceReduce::Reduce(NULL, cub_scratch_bytes, d_in, d_out, n, DEME_CUB_SUM_OP(T2), (T2)0, this_stream); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); void* d_scratch_space = (void*)scratchPad.allocateScratchSpace(cub_scratch_bytes); - cub::DeviceReduce::Reduce(d_scratch_space, cub_scratch_bytes, d_in, d_out, n, cub::Sum(), (T2)0, this_stream); + cub::DeviceReduce::Reduce(d_scratch_space, cub_scratch_bytes, d_in, d_out, n, DEME_CUB_SUM_OP(T2), (T2)0, + this_stream); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); } -// template -// void cubDEMSum(T1* d_in, T1* d_out, size_t n, cudaStream_t& this_stream, DEMSolverScratchData& scratchPad) { -// size_t cub_scratch_bytes = 0; -// cub::DeviceReduce::Sum(NULL, cub_scratch_bytes, d_in, d_out, n, this_stream); -// DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); -// void* d_scratch_space = (void*)scratchPad.allocateScratchSpace(cub_scratch_bytes); -// cub::DeviceReduce::Sum(d_scratch_space, cub_scratch_bytes, d_in, d_out, n, this_stream); -// DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); -// } - template void cubDEMMax(T1* d_in, T1* d_out, size_t n, cudaStream_t& this_stream, DEMSolverScratchData& scratchPad) { size_t cub_scratch_bytes = 0; From 16a31288c35e74cbd0fda1246d461216a7fc6e99 Mon Sep 17 00:00:00 2001 From: Ruochun Zhang Date: Fri, 16 Jan 2026 16:23:29 +0800 Subject: [PATCH 02/17] Fix CUDA 12.8-related issue --- cmake/CudaSupportedArchitectures.cmake | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmake/CudaSupportedArchitectures.cmake b/cmake/CudaSupportedArchitectures.cmake index 4d163870..54cf7f0c 100644 --- a/cmake/CudaSupportedArchitectures.cmake +++ b/cmake/CudaSupportedArchitectures.cmake @@ -20,7 +20,6 @@ # version of the CUDA Toolkit # # Minimum CUDA version: 7.0 -# Maximum CUDA version: 11.6 function(cuda_supported_architectures) @@ -31,7 +30,8 @@ function(cuda_supported_architectures) set(cu10 30 35 50 52 60 61 70 72 75) set(cu11 35 50 52 60 61 70 72 75 80) set(cu11_x 35 50 52 60 61 70 72 75 80 86) - set(cu12_x 50 52 60 61 70 72 75 80 86 89 120) + set(cu12_x 50 52 60 61 70 72 75 80 86 89) + set(cu12_8 50 52 60 61 70 72 75 80 86 89 120) set(cu13_x 75 80 86 89 90 100 120 121) if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 7) @@ -61,6 +61,10 @@ function(cuda_supported_architectures) if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12) set(CUDASUP_ARCHITECTURES ${cu12_x} CACHE INTERNAL "") endif() + + if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) + set(CUDASUP_ARCHITECTURES ${cu12_8} CACHE INTERNAL "") + endif() if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13) set(CUDASUP_ARCHITECTURES ${cu13_x} CACHE INTERNAL "") From 958bfa9f451da0509fb338cb778c4964236dcce9 Mon Sep 17 00:00:00 2001 From: Ruochun Zhang Date: Fri, 16 Jan 2026 19:17:32 +0800 Subject: [PATCH 03/17] Add a demo and slightly improve CD --- src/DEM/dT.cpp | 37 ++--- src/algorithms/DEMDynamicMisc.cu | 31 ++-- src/algorithms/DEMStaticDeviceSubroutines.h | 1 - src/demo/CMakeLists.txt | 1 + src/demo/DEMdemo_DrumCubes.cpp | 158 ++++++++++++++++++++ src/kernel/DEMKinematicMisc.cu | 12 +- 6 files changed, 199 insertions(+), 41 deletions(-) create mode 100644 src/demo/DEMdemo_DrumCubes.cpp diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index 89f71619..f8c16464 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -2337,8 +2337,6 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( "uniqueKeys", countPrimitive * sizeof(contactPairs_t)); float3* votedWeightedNormals = (float3*)solverScratchSpace.allocateTempVector( "votedWeightedNormals", countPrimitive * sizeof(float3)); - double* totalAreas = - (double*)solverScratchSpace.allocateTempVector("totalAreas", countPrimitive * sizeof(double)); solverScratchSpace.allocateDualStruct("numUniqueKeys"); size_t* numUniqueKeys = solverScratchSpace.getDualStructDevice("numUniqueKeys"); @@ -2367,22 +2365,14 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( numUniqueKeysHost, countPatch, contact_type); } - // Step 3: Reduce-by-key for areas (sum) - // Note: CUB's ReduceByKey requires an output array for unique keys, and the keys - // are the same as in Step 2. - cubSumReduceByKey(keys, uniqueKeys, areas, totalAreas, numUniqueKeys, - countPrimitive, streamInfo.stream, solverScratchSpace); - - // Step 4: Normalize the voted normals by total area and scatter back to a temp array. + // Step 3: Normalize the voted normals by total area and scatter back to a temp array. float3* votedNormals = (float3*)solverScratchSpace.allocateTempVector("votedNormals", countPatch * sizeof(float3)); - normalizeAndScatterVotedNormals(votedWeightedNormals, totalAreas, votedNormals, countPatch, - streamInfo.stream); + normalizeAndScatterVotedNormals(votedWeightedNormals, votedNormals, countPatch, streamInfo.stream); solverScratchSpace.finishUsingTempVector("votedWeightedNormals"); - solverScratchSpace.finishUsingTempVector("totalAreas"); // displayDeviceFloat3(votedNormals, countPatch); - // Step 5: Compute projected penetration and area for each primitive contact + // Step 4: Compute projected penetration and area for each primitive contact // Both the penetration and area are projected onto the voted normal // If the projected penetration becomes negative, both are set to 0 // Reuse keys array for the reduce-by-key operation @@ -2395,14 +2385,14 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( streamInfo.stream); solverScratchSpace.finishUsingTempVector("areas"); - // Step 6: Reduce-by-key to get total projected area per patch pair (sum) + // Step 5: Reduce-by-key to get total projected area per patch pair (sum) double* totalProjectedAreas = (double*)solverScratchSpace.allocateTempVector("totalProjectedAreas", countPatch * sizeof(double)); cubSumReduceByKey(keys, uniqueKeys, projectedAreas, totalProjectedAreas, numUniqueKeys, countPrimitive, streamInfo.stream, solverScratchSpace); - // Step 7: Reduce-by-key to get max projected penetration per patch pair (max). + // Step 6: Reduce-by-key to get max projected penetration per patch pair (max). // This result, maxProjectedPenetrations, is the max of projected penetration, aka the max pen in the // physical overlap case, and it's not the same as maxPenetrations in step 9 which is a fallback // primitive-derived penetration. @@ -2412,7 +2402,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( maxProjectedPenetrations, numUniqueKeys, countPrimitive, streamInfo.stream, solverScratchSpace); - // Step 8: Compute weighted contact points for each primitive (normal case) + // Step 7: Compute weighted contact points for each primitive (normal case) // The weight is: projected_penetration * projected_area // Reuse keys, uniqueKeys, and numUniqueKeys that are still allocated double3* weightedContactPoints = (double3*)solverScratchSpace.allocateTempVector( @@ -2444,16 +2434,16 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( solverScratchSpace.finishUsingTempVector("totalWeightedContactPoints"); solverScratchSpace.finishUsingTempVector("totalContactWeights"); - // Step 9: Handle zero-area patches (all primitive areas are 0) + // Step 8: Handle zero-area patches (all primitive areas are 0) // For these patches, we need to find the max penetration primitive and use its normal/penetration - // 9a: Extract primitive penetrations for max-reduce + // 8a: Extract primitive penetrations for max-reduce double* primitivePenetrations = (double*)solverScratchSpace.allocateTempVector( "primitivePenetrations", countPrimitive * sizeof(double)); extractPrimitivePenetrations(&granData, primitivePenetrations, startOffsetPrimitive, countPrimitive, streamInfo.stream); - // 9b: Max-negative-reduce-by-key to get max negative penetration per patch + // 8b: Max-negative-reduce-by-key to get max negative penetration per patch // This finds the largest negative value (smallest absolute value among negatives) // Positive values are treated as very negative to indicate invalid/non-physical state double* maxPenetrations = @@ -2463,7 +2453,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( streamInfo.stream, solverScratchSpace); solverScratchSpace.finishUsingTempVector("primitivePenetrations"); - // 9c: Find max-penetration primitives for zero-area patches and extract their normals, penetrations, + // 8c: Find max-penetration primitives for zero-area patches and extract their normals, penetrations, // and contact points float3* zeroAreaNormals = (float3*)solverScratchSpace.allocateTempVector("zeroAreaNormals", countPatch * sizeof(float3)); @@ -2476,7 +2466,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( startOffsetPrimitive, startOffsetPatch, countPrimitive, streamInfo.stream); solverScratchSpace.finishUsingTempVector("maxPenetrations"); - // Step 9d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) + // Step 8d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) // If no primitive satisfies SAT, the patch contact is non-physical and should use Step 9 fallback notStupidBool_t* patchHasSAT = nullptr; if (contact_type == TRIANGLE_TRIANGLE_CONTACT) { @@ -2492,7 +2482,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( solverScratchSpace.finishUsingTempVector("uniqueKeys"); solverScratchSpace.finishUsingDualStruct("numUniqueKeys"); - // Step 10: Finalize patch results by combining voting with zero-area handling. + // Step 9: Finalize patch results by combining voting with zero-area handling. // If patch-based projected area is 0 (or this patch pair consists of no SAT pair), meaning no physical // contact, we use the fallback estimations (zeroArea*) of CP, penetration and areas. double* finalAreas = @@ -2557,6 +2547,9 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( cubMaxReduce(finalPenetrations.data(), &maxTriTriPenetration, countPatch, streamInfo.stream, solverScratchSpace); // No toHost() here - keep on device since host never needs it + // maxTriTriPenetration.toHost(); + // std::cout << "Max tri-tri penetration after patch-based correction: " << *maxTriTriPenetration + // << std::endl; } // Final clean up diff --git a/src/algorithms/DEMDynamicMisc.cu b/src/algorithms/DEMDynamicMisc.cu index f584b1b2..8179fe47 100644 --- a/src/algorithms/DEMDynamicMisc.cu +++ b/src/algorithms/DEMDynamicMisc.cu @@ -138,9 +138,15 @@ __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, // Extract the area (double) from contactPointGeometryB (stored as float3) float3 areaStorage = granData->contactPointGeometryB[myContactID]; double area = float3StorageToDouble(areaStorage); + float3 penStorage = granData->contactPointGeometryA[myContactID]; + double penetration = float3StorageToDouble(penStorage); + penetration = (penetration > DEME_TINY_FLOAT) ? penetration : DEME_TINY_FLOAT; + double recipPen = 1.0 / penetration; // Compute weighted normal (normal * area) - weightedNormals[idx] = make_float3(normal.x * area, normal.y * area, normal.z * area); + // Note that fake contacts do not affect as their area is 0 + weightedNormals[idx] = make_float3((double)normal.x * area * recipPen, (double)normal.y * area * recipPen, + (double)normal.z * area * recipPen); // Store area for reduction areas[idx] = area; @@ -170,23 +176,19 @@ void prepareWeightedNormalsForVoting(DEMDataDT* granData, // Assumes uniqueKeys are sorted (CUB's ReduceByKey maintains sort order) // Uses contactPairs_t keys (geomToPatchMap values) __global__ void normalizeAndScatterVotedNormals_impl(float3* votedWeightedNormals, - double* totalAreas, float3* output, contactPairs_t count) { contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { - float3 votedNormal = make_float3(0, 0, 0); - double totalArea = totalAreas[idx]; - if (totalArea > 0.0) { - // Normalize by dividing by total area (use reciprocal multiplication for efficiency) - double invTotalArea = 1.0 / totalArea; - votedNormal.x = votedWeightedNormals[idx].x * invTotalArea; - votedNormal.y = votedWeightedNormals[idx].y * invTotalArea; - votedNormal.z = votedWeightedNormals[idx].z * invTotalArea; - // Normalization is needed, as voting by area can destroy unit length - votedNormal = normalize(votedNormal); + float3 votedNormal = votedWeightedNormals[idx]; + float len2 = length2(votedNormal); + if (len2 > 0.f) { + // Normalize votedNormal + votedNormal *= rsqrtf(len2); + } else { + // If total area is 0, set to (0,0,0) to mark no real contact + votedNormal = make_float3(0.0f, 0.0f, 0.0f); } - // else: votedNormal remains (0,0,0) // Write to output at the correct position output[idx] = votedNormal; @@ -194,14 +196,13 @@ __global__ void normalizeAndScatterVotedNormals_impl(float3* votedWeightedNormal } void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, - double* totalAreas, float3* output, contactPairs_t count, cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { normalizeAndScatterVotedNormals_impl<<>>( - votedWeightedNormals, totalAreas, output, count); + votedWeightedNormals, output, count); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); } } diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index 081a2b02..747e74c9 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -184,7 +184,6 @@ void prepareWeightedNormalsForVoting(DEMDataDT* granData, // Normalizes voted normals by total area and scatters to output // If total area is 0, output is (0,0,0) indicating no contact void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, - double* totalAreas, float3* output, contactPairs_t count, cudaStream_t& this_stream); diff --git a/src/demo/CMakeLists.txt b/src/demo/CMakeLists.txt index fd6576b2..f99189c9 100644 --- a/src/demo/CMakeLists.txt +++ b/src/demo/CMakeLists.txt @@ -23,6 +23,7 @@ SET(DEMOS DEMdemo_MeshFalling DEMdemo_TestPack DEMdemo_RotatingDrum + DEMdemo_DrumCubes DEMdemo_Centrifuge DEMdemo_GameOfLife DEMdemo_BallDrop diff --git a/src/demo/DEMdemo_DrumCubes.cpp b/src/demo/DEMdemo_DrumCubes.cpp new file mode 100644 index 00000000..836c4679 --- /dev/null +++ b/src/demo/DEMdemo_DrumCubes.cpp @@ -0,0 +1,158 @@ +// Copyright (c) 2021, SBEL GPU Development Team +// Copyright (c) 2021, University of Wisconsin - Madison +// +// SPDX-License-Identifier: BSD-3-Clause + +// ============================================================================= +// Rotating drum centrifuge demo with only cube mesh particles. +// Matches the output style of DEMdemo_Centrifuge but uses 10 mm cubes +// (12-triangle mesh) inside an analytically defined cylinder and lids. +// ============================================================================= + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace deme; +using namespace std::filesystem; + +int main() { + DEMSolver DEMSim; + DEMSim.SetOutputFormat(OUTPUT_FORMAT::CSV); + DEMSim.SetOutputContent(OUTPUT_CONTENT::FAMILY); + DEMSim.SetNoForceRecord(); + DEMSim.SetMeshUniversalContact(true); + + auto mat_type_cube = DEMSim.LoadMaterial({{"E", 1e6}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.01}}); + auto mat_type_drum = DEMSim.LoadMaterial({{"E", 2e6}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.01}}); + DEMSim.SetMaterialPropertyPair("mu", mat_type_cube, mat_type_drum, 0.5); + + const float cube_size = 0.01f; + const float cube_density = 2600.0f; + const float cube_mass = cube_density * cube_size * cube_size * cube_size; + const float cube_moi = cube_mass * cube_size * cube_size / 6.0f; + const float half_diag = 0.5f * cube_size * std::sqrt(3.0f); + + // Load cube mesh template (12 triangles) and scale to 10 mm + auto cube_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_type_cube, true, false); + cube_template->Scale(cube_size); + + // Drum definition + float3 CylCenter = make_float3(0, 0, 0); + float3 CylAxis = make_float3(0, 0, 1); + float CylRad = 0.08f; + float CylHeight = 0.2f; + float CylMass = 1.0f; + float safe_delta = 0.003f; + float IZZ = CylMass * CylRad * CylRad / 2; + float IYY = (CylMass / 12) * (3 * CylRad * CylRad + CylHeight * CylHeight); + auto Drum = DEMSim.AddExternalObject(); + // Drum->AddCylinder(CylCenter, CylAxis, CylRad, mat_type_drum, 0); + Drum->AddPlane(make_float3(CylRad, 0, 0), make_float3(-1, 0, 0), mat_type_drum); + Drum->AddPlane(make_float3(-CylRad, 0, 0), make_float3(1, 0, 0), mat_type_drum); + Drum->AddPlane(make_float3(0, CylRad, 0), make_float3(0, -1, 0), mat_type_drum); + Drum->AddPlane(make_float3(0, -CylRad, 0), make_float3(0, 1, 0), mat_type_drum); + Drum->SetMass(CylMass); + Drum->SetMOI(make_float3(IYY, IYY, IZZ)); + auto Drum_tracker = DEMSim.Track(Drum); + unsigned int drum_family = 100; + Drum->SetFamily(drum_family); + const float rpm = 200.0f; + const float drum_ang_vel = rpm * 2.0f * PI / 60.0f; + DEMSim.SetFamilyPrescribedAngVel(drum_family, "0", "0", to_string_with_precision(drum_ang_vel)); + auto top_bot_planes = DEMSim.AddExternalObject(); + top_bot_planes->AddPlane(make_float3(0, 0, CylHeight / 2. - safe_delta), make_float3(0, 0, -1), mat_type_drum); + top_bot_planes->AddPlane(make_float3(0, 0, -CylHeight / 2. + safe_delta), make_float3(0, 0, 1), mat_type_drum); + top_bot_planes->SetFamily(drum_family); + auto planes_tracker = DEMSim.Track(top_bot_planes); + + // Place 1000 cubes on a grid inside the drum + const unsigned int target_cubes = 1000; + float sample_radius = CylRad - half_diag - safe_delta; + float sample_halfheight = CylHeight / 2.0f - half_diag - safe_delta; + float fill_spacing = cube_size * 1.25f; // leave gap so meshes don't start in contact + std::mt19937 rng(42); + unsigned int created = 0; + for (float z = -sample_halfheight; z <= sample_halfheight && created < target_cubes; z += fill_spacing) { + for (float y = -sample_radius; y <= sample_radius && created < target_cubes; y += fill_spacing) { + for (float x = -sample_radius; x <= sample_radius && created < target_cubes; x += fill_spacing) { + if (x * x + y * y > sample_radius * sample_radius) { + continue; + } + auto cube = DEMSim.AddMeshFromTemplate(cube_template, make_float3(x, y, z)); + cube->SetFamily(1); + cube->SetMass(cube_mass); + cube->SetMOI(make_float3(cube_moi, cube_moi, cube_moi)); + cube->SetInitQuat(make_float4(0.f, 0.f, 0.f, 1.0f)); + created++; + } + } + } + std::cout << "Placed " << created << " cubes inside the drum." << std::endl; + + auto max_v_finder = DEMSim.CreateInspector("max_absv"); + float max_v; + + DEMSim.InstructBoxDomainDimension(0.4, 0.4, 0.4); + float step_size = 1e-4f; + DEMSim.SetInitTimeStep(step_size); + DEMSim.SetGravitationalAcceleration(make_float3(0, 0, -9.81)); + DEMSim.SetExpandSafetyType("auto"); + DEMSim.SetExpandSafetyAdder(drum_ang_vel * CylRad); + DEMSim.Initialize(); + + path out_dir = current_path(); + out_dir /= "DemoOutput_DrumCubes"; + create_directory(out_dir); + + float time_end = 3.0f; + unsigned int fps = 20; + float frame_time = 1.0f / fps; + + std::cout << "Output at " << fps << " FPS" << std::endl; + unsigned int currframe = 0; + unsigned int curr_step = 0; + std::chrono::high_resolution_clock::time_point start = std::chrono::high_resolution_clock::now(); + for (double t = 0; t < (double)time_end; t += frame_time, curr_step++) { + std::cout << "Frame: " << currframe << std::endl; + DEMSim.ShowThreadCollaborationStats(); + char filename[100]; + sprintf(filename, "DEMdemo_output_%04d.vtk", currframe); + DEMSim.WriteMeshFile(out_dir / filename); + currframe++; + max_v = max_v_finder->GetValue(); + std::cout << "Max velocity of any point in simulation is " << max_v << std::endl; + + float3 drum_moi = Drum_tracker->MOI(); + float3 drum_acc = Drum_tracker->ContactAngAccLocal(); + float3 drum_torque = drum_acc * drum_moi; + std::cout << "Contact torque on the side walls is " << drum_torque.x << ", " << drum_torque.y << ", " + << drum_torque.z << std::endl; + + float3 force_on_BC = planes_tracker->ContactAcc() * planes_tracker->Mass(); + std::cout << "Contact force on bottom plane is " << std::abs(force_on_BC.z) << std::endl; + + DEMSim.DoDynamics(frame_time); + } + std::chrono::high_resolution_clock::time_point end = std::chrono::high_resolution_clock::now(); + std::chrono::duration time_sec = std::chrono::duration_cast>(end - start); + std::cout << (time_sec.count()) / time_end * 10.0 << " seconds (wall time) to finish 10 seconds' simulation" + << std::endl; + DEMSim.ShowThreadCollaborationStats(); + DEMSim.ClearThreadCollaborationStats(); + + DEMSim.ShowTimingStats(); + std::cout << "----------------------------------------" << std::endl; + DEMSim.ShowMemStats(); + std::cout << "----------------------------------------" << std::endl; + + std::cout << "DEMdemo_DrumCubes exiting..." << std::endl; + return 0; +} \ No newline at end of file diff --git a/src/kernel/DEMKinematicMisc.cu b/src/kernel/DEMKinematicMisc.cu index b73833f0..82c62723 100644 --- a/src/kernel/DEMKinematicMisc.cu +++ b/src/kernel/DEMKinematicMisc.cu @@ -84,10 +84,16 @@ __global__ void computeMarginFromAbsv_implTri(deme::DEMSimParams* simParams, if (penetrationMargin > simParams->capTriTriPenetration) { penetrationMargin = simParams->capTriTriPenetration; } - - granData->marginSizeTriangle[triID] = + // We hope that penetrationMargin is small, so it's absorbed into the velocity-induce margin. + // But if not, it should prevail to avoid losing contacts involving triangles inside another mesh. + double finalMargin = (double)(vel * simParams->expSafetyMulti + simParams->expSafetyAdder) * (*ts) * (*maxDrift) + - penetrationMargin + granData->familyExtraMarginSize[my_family]; + granData->familyExtraMarginSize[my_family]; + // if (finalMargin < penetrationMargin) { + // finalMargin = penetrationMargin; + // } + + granData->marginSizeTriangle[triID] = finalMargin; } } From 05ce0a2a60b01be17b47c949ca8c1cf0da3a558e Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Sat, 17 Jan 2026 02:30:28 +0100 Subject: [PATCH 04/17] Fix of 2 major dT bugs - revert a critical change form "Mesh_Particles_Json" merge 6a0357c - revert a dT Force patch calc optimization to be improved later on --- src/DEM/dT.cpp | 164 +++++++++---- src/algorithms/DEMCubInstantiations.cu | 18 +- src/algorithms/DEMDynamicMisc.cu | 232 ++++++++---------- src/algorithms/DEMStaticDeviceSubroutines.h | 140 ++++------- src/kernel/DEMCalcForceKernels_Primitive.cu | 5 - src/kernel/DEMContactKernels_SphTri_TriTri.cu | 156 ++---------- 6 files changed, 295 insertions(+), 420 deletions(-) diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index ed2c3294..2eb50295 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -2645,35 +2645,45 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( contactPairs_t startOffsetPatch = start_count_patch.first; contactPairs_t countPatch = start_count_patch.second; + // Vote for the contact direction; voting power depends on the contact area + // This reduce-by-key operation reduces primitive-recorded force pairs into patch/convex part-based + // force pairs. All elements that share the same geomToPatchMap value vote together. if (countPrimitive > 0) { - contactPairs_t* keys = granData->geomToPatchMap + startOffsetPrimitive; + // Allocate temporary arrays for the voting process + float3* weightedNormals = + (float3*)solverScratchSpace.allocateTempVector("weightedNormals", countPrimitive * sizeof(float3)); + double* areas = + (double*)solverScratchSpace.allocateTempVector("areas", countPrimitive * sizeof(double)); + // Keys extracted from geomToPatchMap - these map primitives to patch pairs + contactPairs_t* keys = (contactPairs_t*)solverScratchSpace.allocateTempVector( + "votingKeys", countPrimitive * sizeof(contactPairs_t)); + // Allocate arrays for reduce-by-key results (uniqueKeys uses contactPairs_t, not patchIDPair_t) contactPairs_t* uniqueKeys = (contactPairs_t*)solverScratchSpace.allocateTempVector( "uniqueKeys", countPrimitive * sizeof(contactPairs_t)); + float3* votedWeightedNormals = (float3*)solverScratchSpace.allocateTempVector( + "votedWeightedNormals", countPrimitive * sizeof(float3)); solverScratchSpace.allocateDualStruct("numUniqueKeys"); size_t* numUniqueKeys = solverScratchSpace.getDualStructDevice("numUniqueKeys"); - // Step 1: Area-weighted normals for voting - float3* weightedNormals = - (float3*)solverScratchSpace.allocateTempVector("weightedNormals", countPrimitive * sizeof(float3)); - prepareWeightedNormalsForVoting(&granData, weightedNormals, startOffsetPrimitive, countPrimitive, - streamInfo.stream); + // Step 1: Prepare weighted normals, areas, and keys + // The kernel extracts keys from geomToPatchMap, computes weighted normals, and stores areas + prepareWeightedNormalsForVoting(&granData, weightedNormals, areas, keys, startOffsetPrimitive, + countPrimitive, streamInfo.stream); - float3* votedWeightedNormals = (float3*)solverScratchSpace.allocateTempVector( - "votedWeightedNormals", countPrimitive * sizeof(float3)); + // Step 2: Reduce-by-key for weighted normals (sum) + // The keys are geomToPatchMap values (contactPairs_t), which group primitives by patch pair cubSumReduceByKey(keys, uniqueKeys, weightedNormals, votedWeightedNormals, numUniqueKeys, countPrimitive, streamInfo.stream, solverScratchSpace); solverScratchSpace.finishUsingTempVector("weightedNormals"); - - // Normalize the voted normals using unique keys and scatter to patch-local storage. - float3* votedNormals = - (float3*)solverScratchSpace.allocateTempVector("votedNormals", countPatch * sizeof(float3)); + // For extra safety solverScratchSpace.syncDualStructDeviceToHost("numUniqueKeys"); size_t numUniqueKeysHost = *(solverScratchSpace.getDualStructHost("numUniqueKeys")); - normalizeAndScatterVotedNormalsFromUniqueKeys(votedWeightedNormals, uniqueKeys, votedNormals, - startOffsetPatch, numUniqueKeysHost, streamInfo.stream); - solverScratchSpace.finishUsingTempVector("votedWeightedNormals"); + // std::cout << "Keys:" << std::endl; + // displayDeviceArray(keys, countPrimitive); + // std::cout << "Unique Keys:" << std::endl; + // displayDeviceArray(uniqueKeys, numUniqueKeysHost); if (numUniqueKeysHost != countPatch) { DEME_ERROR( "Patch-based contact voting produced %zu unique patch pairs, but expected %zu pairs for " @@ -2681,40 +2691,108 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( numUniqueKeysHost, countPatch, contact_type); } - // Step 2: Fused accumulation (sum + max) in a single reduce-by-key. - FusedPatchAccum* primitiveAccums = (FusedPatchAccum*)solverScratchSpace.allocateTempVector( - "fusedPrimitiveAccums", countPrimitive * sizeof(FusedPatchAccum)); - computeFusedPatchContactAccumulators(&granData, votedNormals, keys, primitiveAccums, - startOffsetPrimitive, startOffsetPatch, countPrimitive, - streamInfo.stream); - - FusedPatchAccum* patchAccums = (FusedPatchAccum*)solverScratchSpace.allocateTempVector( - "fusedPatchAccums", numUniqueKeysHost * sizeof(FusedPatchAccum)); - cubSumReduceByKey(keys, uniqueKeys, primitiveAccums, patchAccums, - numUniqueKeys, countPrimitive, streamInfo.stream, - solverScratchSpace); - solverScratchSpace.finishUsingTempVector("fusedPrimitiveAccums"); - + // Step 3: Normalize the voted normals by total area and scatter back to a temp array. + float3* votedNormals = + (float3*)solverScratchSpace.allocateTempVector("votedNormals", countPatch * sizeof(float3)); + normalizeAndScatterVotedNormals(votedWeightedNormals, votedNormals, countPatch, streamInfo.stream); + solverScratchSpace.finishUsingTempVector("votedWeightedNormals"); + // displayDeviceFloat3(votedNormals, countPatch); + + // Step 4: Compute projected penetration and area for each primitive contact + // Both the penetration and area are projected onto the voted normal + // If the projected penetration becomes negative, both are set to 0 + // Reuse keys array for the reduce-by-key operation + double* projectedPenetrations = (double*)solverScratchSpace.allocateTempVector( + "projectedPenetrations", countPrimitive * sizeof(double)); + double* projectedAreas = + (double*)solverScratchSpace.allocateTempVector("projectedAreas", countPrimitive * sizeof(double)); + computeWeightedUsefulPenetration(&granData, votedNormals, keys, areas, projectedPenetrations, + projectedAreas, startOffsetPrimitive, startOffsetPatch, countPrimitive, + streamInfo.stream); + solverScratchSpace.finishUsingTempVector("areas"); + + // Step 5: Reduce-by-key to get total projected area per patch pair (sum) double* totalProjectedAreas = (double*)solverScratchSpace.allocateTempVector("totalProjectedAreas", countPatch * sizeof(double)); + cubSumReduceByKey(keys, uniqueKeys, projectedAreas, totalProjectedAreas, + numUniqueKeys, countPrimitive, streamInfo.stream, + solverScratchSpace); + + // Step 6: Reduce-by-key to get max projected penetration per patch pair (max). + // This result, maxProjectedPenetrations, is the max of projected penetration, aka the max pen in the + // physical overlap case, and it's not the same as maxPenetrations in step 9 which is a fallback + // primitive-derived penetration. double* maxProjectedPenetrations = (double*)solverScratchSpace.allocateTempVector( "maxProjectedPenetrations", countPatch * sizeof(double)); + cubMaxReduceByKey(keys, uniqueKeys, projectedPenetrations, + maxProjectedPenetrations, numUniqueKeys, countPrimitive, + streamInfo.stream, solverScratchSpace); + + // Step 7: Compute weighted contact points for each primitive (normal case) + // The weight is: projected_penetration * projected_area + // Reuse keys, uniqueKeys, and numUniqueKeys that are still allocated + double3* weightedContactPoints = (double3*)solverScratchSpace.allocateTempVector( + "weightedContactPoints", countPrimitive * sizeof(double3)); + double* contactWeights = + (double*)solverScratchSpace.allocateTempVector("contactWeights", countPrimitive * sizeof(double)); + computeWeightedContactPoints(&granData, weightedContactPoints, contactWeights, projectedPenetrations, + projectedAreas, startOffsetPrimitive, countPrimitive, streamInfo.stream); + solverScratchSpace.finishUsingTempVector("projectedPenetrations"); + solverScratchSpace.finishUsingTempVector("projectedAreas"); + // Reduce-by-key to get total weighted contact points per patch pair + double3* totalWeightedContactPoints = (double3*)solverScratchSpace.allocateTempVector( + "totalWeightedContactPoints", countPatch * sizeof(double3)); + double* totalContactWeights = + (double*)solverScratchSpace.allocateTempVector("totalContactWeights", countPatch * sizeof(double)); + cubSumReduceByKey(keys, uniqueKeys, weightedContactPoints, + totalWeightedContactPoints, numUniqueKeys, countPrimitive, + streamInfo.stream, solverScratchSpace); + cubSumReduceByKey(keys, uniqueKeys, contactWeights, totalContactWeights, + numUniqueKeys, countPrimitive, streamInfo.stream, + solverScratchSpace); + solverScratchSpace.finishUsingTempVector("weightedContactPoints"); + solverScratchSpace.finishUsingTempVector("contactWeights"); + // Compute voted contact points per patch pair by dividing by total weight double3* votedContactPoints = (double3*)solverScratchSpace.allocateTempVector("votedContactPoints", countPatch * sizeof(double3)); + computeFinalContactPointsPerPatch(totalWeightedContactPoints, totalContactWeights, votedContactPoints, + countPatch, streamInfo.stream); + solverScratchSpace.finishUsingTempVector("totalWeightedContactPoints"); + solverScratchSpace.finishUsingTempVector("totalContactWeights"); + + // Step 8: Handle zero-area patches (all primitive areas are 0) + // For these patches, we need to find the max penetration primitive and use its normal/penetration + + // 8a: Extract primitive penetrations for max-reduce + double* primitivePenetrations = (double*)solverScratchSpace.allocateTempVector( + "primitivePenetrations", countPrimitive * sizeof(double)); + extractPrimitivePenetrations(&granData, primitivePenetrations, startOffsetPrimitive, countPrimitive, + streamInfo.stream); + + // 8b: Max-negative-reduce-by-key to get max negative penetration per patch + // This finds the largest negative value (smallest absolute value among negatives) + // Positive values are treated as very negative to indicate invalid/non-physical state + double* maxPenetrations = + (double*)solverScratchSpace.allocateTempVector("maxPenetrations", countPatch * sizeof(double)); + cubMaxNegativeReduceByKey(keys, uniqueKeys, primitivePenetrations, + maxPenetrations, numUniqueKeys, countPrimitive, + streamInfo.stream, solverScratchSpace); + solverScratchSpace.finishUsingTempVector("primitivePenetrations"); + + // 8c: Find max-penetration primitives for zero-area patches and extract their normals, penetrations, + // and contact points float3* zeroAreaNormals = (float3*)solverScratchSpace.allocateTempVector("zeroAreaNormals", countPatch * sizeof(float3)); double* zeroAreaPenetrations = (double*)solverScratchSpace.allocateTempVector("zeroAreaPenetrations", countPatch * sizeof(double)); double3* zeroAreaContactPoints = (double3*)solverScratchSpace.allocateTempVector( "zeroAreaContactPoints", countPatch * sizeof(double3)); + findMaxPenetrationPrimitiveForZeroAreaPatches( + &granData, maxPenetrations, zeroAreaNormals, zeroAreaPenetrations, zeroAreaContactPoints, keys, + startOffsetPrimitive, startOffsetPatch, countPrimitive, streamInfo.stream); + solverScratchSpace.finishUsingTempVector("maxPenetrations"); - scatterFusedPatchAccumulators(patchAccums, uniqueKeys, totalProjectedAreas, maxProjectedPenetrations, - votedContactPoints, votedNormals, zeroAreaNormals, zeroAreaPenetrations, - zeroAreaContactPoints, startOffsetPatch, numUniqueKeysHost, - streamInfo.stream); - solverScratchSpace.finishUsingTempVector("fusedPatchAccums"); - - // Step 9d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) + // Step 8d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) // If no primitive satisfies SAT, the patch contact is non-physical and should use Step 9 fallback notStupidBool_t* patchHasSAT = nullptr; if (contact_type == TRIANGLE_TRIANGLE_CONTACT) { @@ -2725,11 +2803,12 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( streamInfo.stream); } - // Clean up key bookkeeping now that we're done with reductions + // Clean up keys arrays now that we're done with reductions + solverScratchSpace.finishUsingTempVector("votingKeys"); solverScratchSpace.finishUsingTempVector("uniqueKeys"); solverScratchSpace.finishUsingDualStruct("numUniqueKeys"); - // Step 10: Finalize patch results by combining voting with zero-area handling. + // Step 9: Finalize patch results by combining voting with zero-area handling. // If patch-based projected area is 0 (or this patch pair consists of no SAT pair), meaning no physical // contact, we use the fallback estimations (zeroArea*) of CP, penetration and areas. double* finalAreas = @@ -2754,9 +2833,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( solverScratchSpace.finishUsingTempVector("zeroAreaPenetrations"); solverScratchSpace.finishUsingTempVector("votedContactPoints"); solverScratchSpace.finishUsingTempVector("zeroAreaContactPoints"); - if (patchHasSAT != nullptr) { - solverScratchSpace.finishUsingTempVector("patchHasSAT"); - } + solverScratchSpace.finishUsingTempVector("patchHasSAT"); // Now we have: // - finalAreas: final contact area per patch pair (countPatch elements) @@ -2786,6 +2863,8 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( } } } + DEME_GPU_CALL(cudaStreamSynchronize(streamInfo.stream)); + // If this is a tri-tri contact, compute max penetration for kT // The max value stays on device until sendToTheirBuffer transfers it if (contact_type == TRIANGLE_TRIANGLE_CONTACT && countPatch > 0) { @@ -2794,6 +2873,9 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( cubMaxReduce(finalPenetrations.data(), &maxTriTriPenetration, countPatch, streamInfo.stream, solverScratchSpace); // No toHost() here - keep on device since host never needs it + // maxTriTriPenetration.toHost(); + // std::cout << "Max tri-tri penetration after patch-based correction: " << *maxTriTriPenetration + // << std::endl; } // Final clean up diff --git a/src/algorithms/DEMCubInstantiations.cu b/src/algorithms/DEMCubInstantiations.cu index 86380234..3c93680d 100644 --- a/src/algorithms/DEMCubInstantiations.cu +++ b/src/algorithms/DEMCubInstantiations.cu @@ -97,23 +97,7 @@ template void cubSumReduceByKey(contactPairs_t* d_keys_i size_t n, cudaStream_t& this_stream, DEMSolverScratchData& scratchPad); -// Patch contact accumulators (sum + max) with contactPairs_t keys -template void cubSumReduceByKey(contactPairs_t* d_keys_in, - contactPairs_t* d_unique_out, - PatchContactAccum* d_vals_in, - PatchContactAccum* d_aggregates_out, - size_t* d_num_out, - size_t n, - cudaStream_t& this_stream, - DEMSolverScratchData& scratchPad); -template void cubSumReduceByKey(contactPairs_t* d_keys_in, - contactPairs_t* d_unique_out, - FusedPatchAccum* d_vals_in, - FusedPatchAccum* d_aggregates_out, - size_t* d_num_out, - size_t n, - cudaStream_t& this_stream, - DEMSolverScratchData& scratchPad); + //////////////////////////////////////////////////////////////////////////////// // Reduce::Max diff --git a/src/algorithms/DEMDynamicMisc.cu b/src/algorithms/DEMDynamicMisc.cu index 007cb3e8..43143cbe 100644 --- a/src/algorithms/DEMDynamicMisc.cu +++ b/src/algorithms/DEMDynamicMisc.cu @@ -119,10 +119,12 @@ void getContactForcesConcerningOwners(float3* d_points, // Patch-based voting kernels for mesh contact correction //////////////////////////////////////////////////////////////////////////////// -// Kernel to compute weighted normals (normal * area) for voting. -// Keys are read directly from geomToPatchMap on the fly, so only weightedNormals need to be written here. +// Kernel to compute weighted normals (normal * area) for voting +// Also prepares the area values for reduction and extracts the keys (geomToPatchMap values) __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, float3* weightedNormals, + double* areas, + contactPairs_t* keys, contactPairs_t startOffset, contactPairs_t count) { contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; @@ -135,54 +137,70 @@ __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, // Extract the area (double) from contactPointGeometryB (stored as float3) float3 areaStorage = granData->contactPointGeometryB[myContactID]; double area = float3StorageToDouble(areaStorage); + float3 penStorage = granData->contactPointGeometryA[myContactID]; + double penetration = float3StorageToDouble(penStorage); + penetration = (penetration > DEME_TINY_FLOAT) ? penetration : DEME_TINY_FLOAT; + double recipPen = 1.0 / penetration; // Compute weighted normal (normal * area) - weightedNormals[idx] = make_float3(normal.x * area, normal.y * area, normal.z * area); + // Note that fake contacts do not affect as their area is 0 + weightedNormals[idx] = make_float3((double)normal.x * area * recipPen, (double)normal.y * area * recipPen, + (double)normal.z * area * recipPen); + + // Store area for reduction + areas[idx] = area; + + // Extract key from geomToPatchMap + keys[idx] = granData->geomToPatchMap[myContactID]; } } void prepareWeightedNormalsForVoting(DEMDataDT* granData, float3* weightedNormals, + double* areas, + contactPairs_t* keys, contactPairs_t startOffset, contactPairs_t count, cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { prepareWeightedNormalsForVoting_impl<<>>( - granData, weightedNormals, startOffset, count); + granData, weightedNormals, areas, keys, startOffset, count); } } -// Kernel to normalize voted normals and scatter them based on unique keys. -// Uses uniqueKeys (geomToPatchMap) to locate the patch slot, removing the need for total area arrays. -__global__ void normalizeAndScatterVotedNormalsFromUniqueKeys_impl(float3* votedWeightedNormals, - contactPairs_t* uniqueKeys, - float3* output, - contactPairs_t startOffsetPatch, - contactPairs_t count) { +// Kernel to normalize the voted normals by dividing by total area and scatter to output +// If total area is 0, set result to (0,0,0) +// Assumes uniqueKeys are sorted (CUB's ReduceByKey maintains sort order) +// Uses contactPairs_t keys (geomToPatchMap values) +__global__ void normalizeAndScatterVotedNormals_impl(float3* votedWeightedNormals, + float3* output, + contactPairs_t count) { contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { - contactPairs_t patchIdx = uniqueKeys[idx]; - contactPairs_t localIdx = patchIdx - startOffsetPatch; - float3 votedNormal = votedWeightedNormals[idx]; - float len2 = votedNormal.x * votedNormal.x + votedNormal.y * votedNormal.y + votedNormal.z * votedNormal.z; - // normalize when length is non-zero; otherwise leave zero vector - output[localIdx] = (len2 > 0.f) ? normalize(votedNormal) : make_float3(0, 0, 0); + float len2 = length2(votedNormal); + if (len2 > 0.f) { + // Normalize votedNormal + votedNormal *= rsqrtf(len2); + } else { + // If total area is 0, set to (0,0,0) to mark no real contact + votedNormal = make_float3(0.0f, 0.0f, 0.0f); + } + + // Write to output at the correct position + output[idx] = votedNormal; } } -void normalizeAndScatterVotedNormalsFromUniqueKeys(float3* votedWeightedNormals, - contactPairs_t* uniqueKeys, - float3* output, - contactPairs_t startOffsetPatch, - contactPairs_t count, - cudaStream_t& this_stream) { +void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, + float3* output, + contactPairs_t count, + cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { - normalizeAndScatterVotedNormalsFromUniqueKeys_impl<<>>(votedWeightedNormals, uniqueKeys, output, - startOffsetPatch, count); + normalizeAndScatterVotedNormals_impl<<>>( + votedWeightedNormals, output, count); } } @@ -190,125 +208,91 @@ void normalizeAndScatterVotedNormalsFromUniqueKeys(float3* votedWeightedNormals, // Penetration depth computation kernels for mesh contact correction //////////////////////////////////////////////////////////////////////////////// -// Kernel to compute per-primitive patch accumulators (projected area, max projected penetration, weighted CP sum). -__global__ void computeFusedPatchContactAccumulators_impl(DEMDataDT* granData, - float3* votedNormals, - const contactPairs_t* keys, - FusedPatchAccum* accumulators, - contactPairs_t startOffsetPrimitive, - contactPairs_t startOffsetPatch, - contactPairs_t count) { +// Kernel to compute weighted useful penetration for each primitive contact +// The "useful" penetration is the original penetration projected onto the voted normal. +// If the projection makes penetration negative (tangential contact), it's clamped to 0. +// Each primitive's useful penetration is then weighted by its contact area. +__global__ void computeWeightedUsefulPenetration_impl(DEMDataDT* granData, + float3* votedNormals, + contactPairs_t* keys, + double* areas, + double* projectedPenetrations, + double* projectedAreas, + contactPairs_t startOffsetPrimitive, + contactPairs_t startOffsetPatch, + contactPairs_t count) { contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { contactPairs_t myContactID = startOffsetPrimitive + idx; + // Get the patch pair index for this primitive (absolute index) contactPairs_t patchIdx = keys[idx]; + + // Get the voted normalized normal for this patch pair + // Subtract startOffsetPatch to get the local index into votedNormals contactPairs_t localPatchIdx = patchIdx - startOffsetPatch; float3 votedNormal = votedNormals[localPatchIdx]; + // If voted normal is (0,0,0), meaning all primitive contacts agree on no contact, then the end result must be + // 0, no special handling needed + // Get the original contact normal (stored in contactForces during primitive force calc) float3 originalNormal = granData->contactForces[myContactID]; + + // Get the original penetration depth from contactPointGeometryA (stored as double in float3) float3 penetrationStorage = granData->contactPointGeometryA[myContactID]; - double rawPenetration = float3StorageToDouble(penetrationStorage); - double clampedPenetration = (rawPenetration > 0.0) ? rawPenetration : 0.0; + double originalPenetration = float3StorageToDouble(penetrationStorage); + // Negative penetration does not participate + if (originalPenetration <= 0.0) { + originalPenetration = 0.0; + } - double area = float3StorageToDouble(granData->contactPointGeometryB[myContactID]); + // Get the contact area from storage that is not yet freed. Note the index is idx not myContactID, as areas is a + // type-specific vector. + double area = areas[idx]; + // Compute the projected penetration and area by projecting onto the voted normal + // Projected penetration: originalPenetration * dot(originalNormal, votedNormal) + // Projected area: area * dot(originalNormal, votedNormal) + // If dot product is negative (opposite directions), set both to 0 float dotProduct = dot(originalNormal, votedNormal); - double cospos = (dotProduct > 0.f) ? (double)dotProduct : 0.0; + double projectedPenetration = originalPenetration * (double)dotProduct; + double projectedArea = area * (double)dotProduct; - double projectedPenetration = clampedPenetration * cospos; - double projectedArea = area * cospos; - double weight = projectedPenetration * projectedArea; - - double3 contactPoint = to_double3(granData->contactTorque_convToForce[myContactID]); - double3 weightedCP = make_double3(contactPoint.x * weight, contactPoint.y * weight, contactPoint.z * weight); - - FusedPatchAccum acc; - acc.sumProjArea = projectedArea; - acc.maxProjPen = projectedPenetration; - acc.sumWeight = weight; - acc.sumWeightedCP = weightedCP; - acc.sumWeightedNormal = - make_float3(originalNormal.x * area, originalNormal.y * area, originalNormal.z * area); - acc.maxPenRaw = rawPenetration; - acc.maxPenNormal = originalNormal; - acc.maxPenCP = contactPoint; - accumulators[idx] = acc; - } -} - -void computeFusedPatchContactAccumulators(DEMDataDT* granData, - float3* votedNormals, - const contactPairs_t* keys, - FusedPatchAccum* accumulators, - contactPairs_t startOffsetPrimitive, - contactPairs_t startOffsetPatch, - contactPairs_t count, - cudaStream_t& this_stream) { - size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; - if (blocks_needed > 0) { - computeFusedPatchContactAccumulators_impl<<>>( - granData, votedNormals, keys, accumulators, startOffsetPrimitive, startOffsetPatch, count); - } -} - -// Kernel to scatter reduced patch accumulators to the final arrays expected by patch-based correction. -__global__ void scatterFusedPatchAccumulators_impl(const FusedPatchAccum* accumulators, - const contactPairs_t* uniqueKeys, - double* totalProjectedAreas, - double* maxProjectedPenetrations, - double3* votedContactPoints, - float3* votedNormals, - float3* zeroAreaNormals, - double* zeroAreaPenetrations, - double3* zeroAreaContactPoints, - contactPairs_t startOffsetPatch, - contactPairs_t count) { - contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < count) { - contactPairs_t patchIdx = uniqueKeys[idx]; - contactPairs_t localIdx = patchIdx - startOffsetPatch; - - FusedPatchAccum acc = accumulators[idx]; - totalProjectedAreas[localIdx] = acc.sumProjArea; - maxProjectedPenetrations[localIdx] = acc.maxProjPen; - - float3 summedNormal = acc.sumWeightedNormal; - float len2 = summedNormal.x * summedNormal.x + summedNormal.y * summedNormal.y + summedNormal.z * summedNormal.z; - votedNormals[localIdx] = (len2 > 0.f) ? normalize(summedNormal) : make_float3(0, 0, 0); - - if (acc.sumWeight > 0.0) { - double invWeight = 1.0 / acc.sumWeight; - votedContactPoints[localIdx] = - make_double3(acc.sumWeightedCP.x * invWeight, acc.sumWeightedCP.y * invWeight, - acc.sumWeightedCP.z * invWeight); - } else { - votedContactPoints[localIdx] = make_double3(0.0, 0.0, 0.0); + // If projected values becomes negative, set both area and penetration to 0 + if (projectedPenetration <= 0.0) { + projectedPenetration = 0.0; } + if (projectedArea <= 0.0) { + projectedArea = 0.0; + } + + projectedPenetrations[idx] = projectedPenetration; + projectedAreas[idx] = projectedArea; - zeroAreaNormals[localIdx] = acc.maxPenNormal; - zeroAreaPenetrations[localIdx] = (acc.maxPenRaw < 0.0) ? acc.maxPenRaw : -DEME_HUGE_FLOAT; - zeroAreaContactPoints[localIdx] = acc.maxPenCP; + // printf( + // "voted normal: (%f, %f, %f), original normal: (%f, %f, %f), original pen: %f, dot: %f, projected pen: %f, + // " "area: %f, projected area: %f\n", votedNormal.x, votedNormal.y, votedNormal.z, originalNormal.x, + // originalNormal.y, originalNormal.z, originalPenetration, dotProduct, projectedPenetration, area, + // projectedArea); } } -void scatterFusedPatchAccumulators(const FusedPatchAccum* accumulators, - const contactPairs_t* uniqueKeys, - double* totalProjectedAreas, - double* maxProjectedPenetrations, - double3* votedContactPoints, - float3* votedNormals, - float3* zeroAreaNormals, - double* zeroAreaPenetrations, - double3* zeroAreaContactPoints, - contactPairs_t startOffsetPatch, - contactPairs_t count, - cudaStream_t& this_stream) { +void computeWeightedUsefulPenetration(DEMDataDT* granData, + float3* votedNormals, + contactPairs_t* keys, + double* areas, + double* projectedPenetrations, + double* projectedAreas, + contactPairs_t startOffsetPrimitive, + contactPairs_t startOffsetPatch, + contactPairs_t count, + cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { - scatterFusedPatchAccumulators_impl<<>>( - accumulators, uniqueKeys, totalProjectedAreas, maxProjectedPenetrations, votedContactPoints, votedNormals, - zeroAreaNormals, zeroAreaPenetrations, zeroAreaContactPoints, startOffsetPatch, count); + computeWeightedUsefulPenetration_impl<<>>( + granData, votedNormals, keys, areas, projectedPenetrations, projectedAreas, startOffsetPrimitive, + startOffsetPatch, count); + } } diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index 9748f4e8..87bac353 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -172,114 +172,56 @@ void getContactForcesConcerningOwners(float3* d_points, // Patch-based voting wrappers for mesh contact correction //////////////////////////////////////////////////////////////////////////////// -// Prepares weighted normals (normal * area) for voting +// Prepares weighted normals (normal * area), areas, and keys from geomToPatchMap for voting void prepareWeightedNormalsForVoting(DEMDataDT* granData, float3* weightedNormals, + double* areas, + contactPairs_t* keys, contactPairs_t startOffset, contactPairs_t count, cudaStream_t& this_stream); -// Normalize voted normals using unique patch keys and scatter to the local patch array -void normalizeAndScatterVotedNormalsFromUniqueKeys(float3* votedWeightedNormals, - contactPairs_t* uniqueKeys, - float3* output, +// Normalizes voted normals by total area and scatters to output +// If total area is 0, output is (0,0,0) indicating no contact +void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, + float3* output, + contactPairs_t count, + cudaStream_t& this_stream); + +// Computes projected penetration and area for each primitive contact +// Both the penetration and area are projected onto the voted normal +// If the projected penetration becomes negative, both are set to 0 +void computeWeightedUsefulPenetration(DEMDataDT* granData, + float3* votedNormals, + contactPairs_t* keys, + double* areas, + double* projectedPenetrations, + double* projectedAreas, + contactPairs_t startOffsetPrimitive, + contactPairs_t startOffsetPatch, + contactPairs_t count, + cudaStream_t& this_stream); + +// Extracts primitive penetrations from contactPointGeometryA for max-reduce operation +void extractPrimitivePenetrations(DEMDataDT* granData, + double* penetrations, + contactPairs_t startOffset, + contactPairs_t count, + cudaStream_t& this_stream); + +// Finds the primitive with max penetration for zero-area patches and extracts its normal, penetration, and contact +// point +void findMaxPenetrationPrimitiveForZeroAreaPatches(DEMDataDT* granData, + double* maxPenetrations, + float3* zeroAreaNormals, + double* zeroAreaPenetrations, + double3* zeroAreaContactPoints, + contactPairs_t* keys, + contactPairs_t startOffsetPrimitive, contactPairs_t startOffsetPatch, - contactPairs_t count, + contactPairs_t countPrimitive, cudaStream_t& this_stream); -// Fused accumulator carrying area-weighted normals, projected metrics, and max-penetration data. -struct FusedPatchAccum { - double sumProjArea; // sum of projected areas (>=0) - double maxProjPen; // max projected penetration (>=0) - double sumWeight; // sum of projectedPenetration*projectedArea (>=0) - double3 sumWeightedCP; // weighted contact point accumulator - float3 sumWeightedNormal; // area-weighted normal (normal*area), used for voted normal - double maxPenRaw; // raw penetration (can be negative) - float3 maxPenNormal; // normal associated with maxPenRaw - double3 maxPenCP; // contact point associated with maxPenRaw - - __host__ __device__ __forceinline__ FusedPatchAccum operator+(const FusedPatchAccum& other) const { - FusedPatchAccum out; - out.sumProjArea = sumProjArea + other.sumProjArea; - out.maxProjPen = (maxProjPen > other.maxProjPen) ? maxProjPen : other.maxProjPen; - out.sumWeight = sumWeight + other.sumWeight; - out.sumWeightedCP = - make_double3(sumWeightedCP.x + other.sumWeightedCP.x, sumWeightedCP.y + other.sumWeightedCP.y, - sumWeightedCP.z + other.sumWeightedCP.z); - out.sumWeightedNormal = - make_float3(sumWeightedNormal.x + other.sumWeightedNormal.x, sumWeightedNormal.y + other.sumWeightedNormal.y, - sumWeightedNormal.z + other.sumWeightedNormal.z); - - // Max-negative preference (equivalent to CubOpMaxNegative): prefer negatives closest to zero; otherwise most - // negative positive (smallest positive) - double a = maxPenRaw; - double b = other.maxPenRaw; - bool pick_other = false; - if (a < 0 && b < 0) { - pick_other = (b > a); // closer to zero negative - } else if (a < 0) { - pick_other = false; - } else if (b < 0) { - pick_other = true; - } else { - pick_other = (b < a); // both non-negative: pick smaller one - } - if (pick_other) { - out.maxPenRaw = b; - out.maxPenNormal = other.maxPenNormal; - out.maxPenCP = other.maxPenCP; - } else { - out.maxPenRaw = a; - out.maxPenNormal = maxPenNormal; - out.maxPenCP = maxPenCP; - } - return out; - } -}; - -// Compute fused per-primitive accumulators (projected metrics + max-penetration + area-weighted normal) -void computeFusedPatchContactAccumulators(DEMDataDT* granData, - float3* votedNormals, - const contactPairs_t* keys, - FusedPatchAccum* accumulators, - contactPairs_t startOffsetPrimitive, - contactPairs_t startOffsetPatch, - contactPairs_t count, - cudaStream_t& this_stream); - -// Scatter fused accumulators to patch-local arrays expected by finalizePatchResults. -void scatterFusedPatchAccumulators(const FusedPatchAccum* accumulators, - const contactPairs_t* uniqueKeys, - double* totalProjectedAreas, - double* maxProjectedPenetrations, - double3* votedContactPoints, - float3* votedNormals, - float3* zeroAreaNormals, - double* zeroAreaPenetrations, - double3* zeroAreaContactPoints, - contactPairs_t startOffsetPatch, - contactPairs_t count, - cudaStream_t& this_stream); - -struct PatchContactAccum { - double sumProjArea; - double maxProjPen; - double sumWeight; - double3 sumWeightedCP; - - __host__ __device__ __forceinline__ PatchContactAccum operator+(const PatchContactAccum& other) const { - PatchContactAccum out; - out.sumProjArea = sumProjArea + other.sumProjArea; - out.maxProjPen = (maxProjPen > other.maxProjPen) ? maxProjPen : other.maxProjPen; - out.sumWeight = sumWeight + other.sumWeight; - out.sumWeightedCP = - make_double3(sumWeightedCP.x + other.sumWeightedCP.x, sumWeightedCP.y + other.sumWeightedCP.y, - sumWeightedCP.z + other.sumWeightedCP.z); - return out; - } -}; - -// Computes projected penetration/area and weighted contact point accumulators per primitive // Checks if any primitive in each patch satisfies SAT (for tri-tri contacts) // Outputs a flag per patch: 1 if at least one SAT-satisfying primitive exists, 0 otherwise void checkPatchHasSATSatisfyingPrimitive(DEMDataDT* granData, diff --git a/src/kernel/DEMCalcForceKernels_Primitive.cu b/src/kernel/DEMCalcForceKernels_Primitive.cu index ce0e8851..a70286c6 100644 --- a/src/kernel/DEMCalcForceKernels_Primitive.cu +++ b/src/kernel/DEMCalcForceKernels_Primitive.cu @@ -37,9 +37,6 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi float AOwnerMass, ARadius, BOwnerMass, BRadius; float4 AOriQ, BOriQ; deme::materialsOffset_t bodyAMatType, bodyBMatType; - // Cache analytic entity info when B is analytical (used for on-the-fly area calc) - deme::objType_t analyticalType = deme::ANAL_OBJ_TYPE_PLANE; - float analyticalSize1 = 0.f; // The user-specified extra margin size (how much we should be lenient in determining `in-contact') float extraMarginSize = 0.; // Triangle A's three points are defined outside, as may be reused in B's acquisition and penetration calc. @@ -278,8 +275,6 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi // For analytical entity, its patch ID is just its own component ID (but myPatchID is hardly used in this // analytical case) deme::bodyID_t myPatchID = analyticalID; - analyticalType = objType[analyticalID]; - analyticalSize1 = objSize1[analyticalID]; // If B is analytical entity, its owner, relative location, material info is jitified. bodyBMatType = objMaterial[analyticalID]; BOwnerMass = objMass[analyticalID]; diff --git a/src/kernel/DEMContactKernels_SphTri_TriTri.cu b/src/kernel/DEMContactKernels_SphTri_TriTri.cu index aeca48ad..104cc6f8 100644 --- a/src/kernel/DEMContactKernels_SphTri_TriTri.cu +++ b/src/kernel/DEMContactKernels_SphTri_TriTri.cu @@ -114,97 +114,6 @@ inline __device__ void fillSharedMemSpheres(deme::DEMSimParams* simParams, radii[myThreadID] = myRadius; } -// Combined AABB overlap check and canonical bin assignment for tri-tri contacts. -// This function does TWO things in one pass to avoid redundant AABB computation: -// 1. Checks if the two prisms' AABBs overlap (early rejection if not) -// 2. Determines if this bin is the canonical bin for this triangle pair -// -// Returns: true if AABBs overlap AND this is the canonical bin to process this pair -// false otherwise (either no overlap or should be processed in another bin) -// -// CANONICAL BIN ASSIGNMENT: -// PROBLEM: Two triangles can be in many bins simultaneously. If we count this pair in -// every bin where both are present, we get massive duplication. -// -// SOLUTION: For each unique triangle pair, assign it to EXACTLY ONE bin using a deterministic -// rule that can be computed locally (without knowing all bins both triangles touch). -// -// APPROACH: Compute the AABB intersection of both prisms. The MINIMUM bin ID that touches -// this intersection is the canonical bin. Since both triangles must touch this intersection -// region, this bin is guaranteed to contain both triangles. -inline __device__ bool shouldProcessTriTriInThisBin(deme::DEMSimParams* simParams, - deme::binID_t currentBinID, - deme::bodyID_t triID_A, - deme::bodyID_t triID_B, - const float3& triANode1, - const float3& triANode2, - const float3& triANode3, - const float3& triBNode1, - const float3& triBNode2, - const float3& triBNode3, - const float3& triANode1_other, - const float3& triANode2_other, - const float3& triANode3_other, - const float3& triBNode1_other, - const float3& triBNode2_other, - const float3& triBNode3_other) { - (void)triID_A; - (void)triID_B; - - // Compute AABB of first prism (6 vertices) - float minX1 = fminf(fminf(fminf(triANode1.x, triANode2.x), fminf(triANode3.x, triBNode1.x)), fminf(triBNode2.x, triBNode3.x)); - float maxX1 = fmaxf(fmaxf(fmaxf(triANode1.x, triANode2.x), fmaxf(triANode3.x, triBNode1.x)), fmaxf(triBNode2.x, triBNode3.x)); - float minY1 = fminf(fminf(fminf(triANode1.y, triANode2.y), fminf(triANode3.y, triBNode1.y)), fminf(triBNode2.y, triBNode3.y)); - float maxY1 = fmaxf(fmaxf(fmaxf(triANode1.y, triANode2.y), fmaxf(triANode3.y, triBNode1.y)), fmaxf(triBNode2.y, triBNode3.y)); - float minZ1 = fminf(fminf(fminf(triANode1.z, triANode2.z), fminf(triANode3.z, triBNode1.z)), fminf(triBNode2.z, triBNode3.z)); - float maxZ1 = fmaxf(fmaxf(fmaxf(triANode1.z, triANode2.z), fmaxf(triANode3.z, triBNode1.z)), fmaxf(triBNode2.z, triBNode3.z)); - - // Compute AABB of second prism (6 vertices) - float minX2 = fminf(fminf(fminf(triANode1_other.x, triANode2_other.x), fminf(triANode3_other.x, triBNode1_other.x)), fminf(triBNode2_other.x, triBNode3_other.x)); - float maxX2 = fmaxf(fmaxf(fmaxf(triANode1_other.x, triANode2_other.x), fmaxf(triANode3_other.x, triBNode1_other.x)), fmaxf(triBNode2_other.x, triBNode3_other.x)); - float minY2 = fminf(fminf(fminf(triANode1_other.y, triANode2_other.y), fminf(triANode3_other.y, triBNode1_other.y)), fminf(triBNode2_other.y, triBNode3_other.y)); - float maxY2 = fmaxf(fmaxf(fmaxf(triANode1_other.y, triANode2_other.y), fmaxf(triANode3_other.y, triBNode1_other.y)), fmaxf(triBNode2_other.y, triBNode3_other.y)); - float minZ2 = fminf(fminf(fminf(triANode1_other.z, triANode2_other.z), fminf(triANode3_other.z, triBNode1_other.z)), fminf(triBNode2_other.z, triBNode3_other.z)); - float maxZ2 = fmaxf(fmaxf(fmaxf(triANode1_other.z, triANode2_other.z), fmaxf(triANode3_other.z, triBNode1_other.z)), fmaxf(triBNode2_other.z, triBNode3_other.z)); - - // EARLY REJECTION: Check AABB overlap first (avoids expensive SAT if no overlap) - const float margin = 1e-6f; - if (minX1 > maxX2 + margin || maxX1 < minX2 - margin || - minY1 > maxY2 + margin || maxY1 < minY2 - margin || - minZ1 > maxZ2 + margin || maxZ1 < minZ2 - margin) { - return false; // AABBs don't overlap, no contact possible - } - - // AABBs overlap - now check if this is the canonical bin for this pair - const float inv_binSize = (float)simParams->dyn.inv_binSize; - - // Compute AABB intersection minimum corner - float intMinX = fmaxf(minX1, minX2); - float intMinY = fmaxf(minY1, minY2); - float intMinZ = fmaxf(minZ1, minZ2); - - // Find the minimum bin ID that touches this intersection - // This is the bin containing the minimum corner of the intersection - int binIdxX = (int)floorf(intMinX * inv_binSize); - int binIdxY = (int)floorf(intMinY * inv_binSize); - int binIdxZ = (int)floorf(intMinZ * inv_binSize); - - // Clamp to valid range - binIdxX = (binIdxX >= 0) ? ((binIdxX < (int)simParams->nbX) ? binIdxX : (int)simParams->nbX - 1) : 0; - binIdxY = (binIdxY >= 0) ? ((binIdxY < (int)simParams->nbY) ? binIdxY : (int)simParams->nbY - 1) : 0; - binIdxZ = (binIdxZ >= 0) ? ((binIdxZ < (int)simParams->nbZ) ? binIdxZ : (int)simParams->nbZ - 1) : 0; - - deme::binID_t canonicalBin = binIDFrom3Indices( - (deme::binID_t)binIdxX, (deme::binID_t)binIdxY, (deme::binID_t)binIdxZ, - simParams->nbX, simParams->nbY, simParams->nbZ); - - // Process only if current bin is the canonical bin for this pair - return (currentBinID == canonicalBin); -} - -// Full prism-prism contact check using SAT (Separating Axis Theorem). -// NOTE: AABB overlap check is already done in shouldProcessTriTriInThisBin(), -// so we skip it here and go directly to the full SAT test. inline __device__ bool checkPrismPrismContact(deme::DEMSimParams* simParams, const float3& triANode1, const float3& triANode2, @@ -218,10 +127,7 @@ inline __device__ bool checkPrismPrismContact(deme::DEMSimParams* simParams, const float3& triBNode1_other, const float3& triBNode2_other, const float3& triBNode3_other) { - (void)simParams; // simParams not needed since AABB check moved to shouldProcessTriTriInThisBin - - // Calculate the contact point between 2 prisms using full SAT check - // AABB pre-check already done in shouldProcessTriTriInThisBin + // Calculate the contact point between 2 prisms, and return whether they are in contact bool in_contact = calc_prism_contact(triANode1, triANode2, triANode3, triBNode1, triBNode2, triBNode3, triANode1_other, triANode2_other, triANode3_other, triBNode1_other, triBNode2_other, triBNode3_other); @@ -272,7 +178,6 @@ __global__ void getNumberOfTriangleContactsEachBin(deme::DEMSimParams* simParams "run despite this, set allowance higher via SetMaxTriangleInBin before simulation starts.", blockIdx.x, nTriInBin, simParams->errOutBinTriNum); } - const deme::spheresBinTouches_t myThreadID = threadIdx.x; // But what is the index of the same binID in array activeBinIDs? Well, mapTriActBinToSphActBin comes to rescure. const deme::binID_t indForAcqSphInfo = mapTriActBinToSphActBin[blockIdx.x]; @@ -418,20 +323,25 @@ __global__ void getNumberOfTriangleContactsEachBin(deme::DEMSimParams* simParams continue; } - // Use canonical bin assignment to avoid duplicate tri-tri contacts across bins. - // Pass full prism (both triA and triB faces) for correct AABB computation. - if (!shouldProcessTriTriInThisBin(simParams, binID, triIDs[bodyA], triIDs[bodyB], - triANode1[bodyA], triANode2[bodyA], triANode3[bodyA], - triBNode1[bodyA], triBNode2[bodyA], triBNode3[bodyA], - triANode1[bodyB], triANode2[bodyB], triANode3[bodyB], - triBNode1[bodyB], triBNode2[bodyB], triBNode3[bodyB])) - continue; - + // Tri--tri contact does not take into account bins, as duplicates will be removed in the end bool in_contact = checkPrismPrismContact( simParams, triANode1[bodyA], triANode2[bodyA], triANode3[bodyA], triBNode1[bodyA], triBNode2[bodyA], triBNode3[bodyA], triANode1[bodyB], triANode2[bodyB], triANode3[bodyB], triBNode1[bodyB], triBNode2[bodyB], triBNode3[bodyB]); + /* + if (in_contact && (contactPntBin != binID)) { + unsigned int ZZ = binID/(simParams->nbX*simParams->nbY); + unsigned int YY = binID%(simParams->nbX*simParams->nbY)/simParams->nbX; + unsigned int XX = binID%(simParams->nbX*simParams->nbY)%simParams->nbX; + double binLocX = (XX + 0.5) * simParams->binSize; + double binLocY = (YY + 0.5) * simParams->binSize; + double binLocZ = (ZZ + 0.5) * simParams->binSize; + printf("binLoc: %f, %f, %f\n", binLocX, binLocY, binLocZ); + printf("triANode1A: %f, %f, %f\n", triANode1[bodyA].x, triANode1[bodyA].y, triANode1[bodyA].z); + } + */ + if (in_contact) { atomicAdd(&blockTriTriPairCnt, 1); } @@ -468,15 +378,7 @@ __global__ void getNumberOfTriangleContactsEachBin(deme::DEMSimParams* simParams continue; } - // Use canonical bin assignment to avoid duplicate tri-tri contacts across bins. - // Pass full prism (both triA and triB faces) for correct AABB computation. - if (!shouldProcessTriTriInThisBin(simParams, binID, triIDs[myThreadID], cur_bodyID, - triANode1[myThreadID], triANode2[myThreadID], triANode3[myThreadID], - triBNode1[myThreadID], triBNode2[myThreadID], triBNode3[myThreadID], - cur_triANode1, cur_triANode2, cur_triANode3, - cur_triBNode1, cur_triBNode2, cur_triBNode3)) - continue; - + // Tri--tri contact does not take into account bins, as duplicates will be removed in the end bool in_contact = checkPrismPrismContact( simParams, triANode1[myThreadID], triANode2[myThreadID], triANode3[myThreadID], triBNode1[myThreadID], triBNode2[myThreadID], triBNode3[myThreadID], cur_triANode1, @@ -696,15 +598,7 @@ __global__ void populateTriangleContactsEachBin(deme::DEMSimParams* simParams, continue; } - // Use canonical bin assignment to avoid duplicate tri-tri contacts across bins. - // Pass full prism (both triA and triB faces) for correct AABB computation. - if (!shouldProcessTriTriInThisBin(simParams, binID, triIDs[bodyA], triIDs[bodyB], - triANode1[bodyA], triANode2[bodyA], triANode3[bodyA], - triBNode1[bodyA], triBNode2[bodyA], triBNode3[bodyA], - triANode1[bodyB], triANode2[bodyB], triANode3[bodyB], - triBNode1[bodyB], triBNode2[bodyB], triBNode3[bodyB])) - continue; - + // Tri--tri contact does not take into account bins, as duplicates will be removed in the end bool in_contact = checkPrismPrismContact( simParams, triANode1[bodyA], triANode2[bodyA], triANode3[bodyA], triBNode1[bodyA], triBNode2[bodyA], triBNode3[bodyA], triANode1[bodyB], triANode2[bodyB], triANode3[bodyB], triBNode1[bodyB], @@ -712,7 +606,8 @@ __global__ void populateTriangleContactsEachBin(deme::DEMSimParams* simParams, if (in_contact) { deme::contactPairs_t inBlockOffset = mmReportOffset + atomicAdd(&blockTriTriPairCnt, 1); - // Respect the budget-limited offset range from the scaled counts + // The chance of offset going out-of-bound is very low, lower than sph--bin CD step, but I put it + // here anyway if (inBlockOffset < mmReportOffset_end) { // ---------------------------------------------------------------------------- // IMPORTANT NOTE: Here, we don't need to adjust A and B ids to ensure A < B, and it's @@ -773,15 +668,7 @@ __global__ void populateTriangleContactsEachBin(deme::DEMSimParams* simParams, continue; } - // Use canonical bin assignment to avoid duplicate tri-tri contacts across bins. - // Pass full prism (both triA and triB faces) for correct AABB computation. - if (!shouldProcessTriTriInThisBin(simParams, binID, triIDs[myThreadID], cur_bodyID, - triANode1[myThreadID], triANode2[myThreadID], triANode3[myThreadID], - triBNode1[myThreadID], triBNode2[myThreadID], triBNode3[myThreadID], - cur_triANode1, cur_triANode2, cur_triANode3, - cur_triBNode1, cur_triBNode2, cur_triBNode3)) - continue; - + // Tri--tri contact does not take into account bins, as duplicates will be removed in the end bool in_contact = checkPrismPrismContact( simParams, triANode1[myThreadID], triANode2[myThreadID], triANode3[myThreadID], triBNode1[myThreadID], triBNode2[myThreadID], triBNode3[myThreadID], cur_triANode1, @@ -789,7 +676,8 @@ __global__ void populateTriangleContactsEachBin(deme::DEMSimParams* simParams, if (in_contact) { deme::contactPairs_t inBlockOffset = mmReportOffset + atomicAdd(&blockTriTriPairCnt, 1); - // Respect the budget-limited offset range from the scaled counts + // The chance of offset going out-of-bound is very low, lower than sph--bin CD step, but I put + // it here anyway if (inBlockOffset < mmReportOffset_end) { deme::bodyID_t triA_ID, triB_ID; if (triIDs[myThreadID] <= cur_bodyID) { From da99f8eff1606fff5817fafbcd22f72a45c0acf0 Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Mon, 19 Jan 2026 10:30:28 +0100 Subject: [PATCH 05/17] regain some kT and dT performance - kT improved with addional precalc kernel to reduce calculating some data twice - dT improved by simplyfing force calculation and added some earlier exits --- src/DEM/dT.cpp | 175 ++-- src/DEM/kT.cpp | 1 + src/algorithms/DEMContactDetection.cu | 58 +- src/algorithms/DEMCubInstantiations.cu | 10 +- src/algorithms/DEMDynamicMisc.cu | 185 ++++- src/algorithms/DEMStaticDeviceSubroutines.h | 69 +- src/kernel/DEMBinTriangleKernels.cu | 757 ++++++++++-------- src/kernel/DEMCalcForceKernels_Primitive.cu | 21 +- .../DEMCollisionKernels_SphTri_TriTri.cuh | 34 +- src/kernel/DEMKinematicMisc.cu | 15 +- 10 files changed, 814 insertions(+), 511 deletions(-) diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index 2eb50295..6f2a1622 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -2649,127 +2649,77 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( // This reduce-by-key operation reduces primitive-recorded force pairs into patch/convex part-based // force pairs. All elements that share the same geomToPatchMap value vote together. if (countPrimitive > 0) { - // Allocate temporary arrays for the voting process - float3* weightedNormals = - (float3*)solverScratchSpace.allocateTempVector("weightedNormals", countPrimitive * sizeof(float3)); - double* areas = - (double*)solverScratchSpace.allocateTempVector("areas", countPrimitive * sizeof(double)); - // Keys extracted from geomToPatchMap - these map primitives to patch pairs - contactPairs_t* keys = (contactPairs_t*)solverScratchSpace.allocateTempVector( - "votingKeys", countPrimitive * sizeof(contactPairs_t)); + // Keys are already available on device: geomToPatchMap maps each primitive contact to its patch pair. + // This avoids materializing an extra temporary key buffer. + contactPairs_t* keys = granData->geomToPatchMap + startOffsetPrimitive; // Allocate arrays for reduce-by-key results (uniqueKeys uses contactPairs_t, not patchIDPair_t) contactPairs_t* uniqueKeys = (contactPairs_t*)solverScratchSpace.allocateTempVector( "uniqueKeys", countPrimitive * sizeof(contactPairs_t)); - float3* votedWeightedNormals = (float3*)solverScratchSpace.allocateTempVector( - "votedWeightedNormals", countPrimitive * sizeof(float3)); solverScratchSpace.allocateDualStruct("numUniqueKeys"); size_t* numUniqueKeys = solverScratchSpace.getDualStructDevice("numUniqueKeys"); - // Step 1: Prepare weighted normals, areas, and keys - // The kernel extracts keys from geomToPatchMap, computes weighted normals, and stores areas - prepareWeightedNormalsForVoting(&granData, weightedNormals, areas, keys, startOffsetPrimitive, - countPrimitive, streamInfo.stream); + // Step 1: Prepare weighted normals for voting. + // Note: the validated legacy semantics uses area/penetration weighting. + float3* weightedNormals = + (float3*)solverScratchSpace.allocateTempVector("weightedNormals", countPrimitive * sizeof(float3)); + prepareWeightedNormalsForVoting(&granData, weightedNormals, startOffsetPrimitive, countPrimitive, + streamInfo.stream); // Step 2: Reduce-by-key for weighted normals (sum) - // The keys are geomToPatchMap values (contactPairs_t), which group primitives by patch pair + // The number of patch pairs (unique keys) is expected to be countPatch. + // Using countPatch here saves scratch memory without changing semantics. + float3* votedWeightedNormals = (float3*)solverScratchSpace.allocateTempVector( + "votedWeightedNormals", countPatch * sizeof(float3)); cubSumReduceByKey(keys, uniqueKeys, weightedNormals, votedWeightedNormals, numUniqueKeys, countPrimitive, streamInfo.stream, solverScratchSpace); solverScratchSpace.finishUsingTempVector("weightedNormals"); - // For extra safety - solverScratchSpace.syncDualStructDeviceToHost("numUniqueKeys"); - size_t numUniqueKeysHost = *(solverScratchSpace.getDualStructHost("numUniqueKeys")); - // std::cout << "Keys:" << std::endl; - // displayDeviceArray(keys, countPrimitive); - // std::cout << "Unique Keys:" << std::endl; - // displayDeviceArray(uniqueKeys, numUniqueKeysHost); - if (numUniqueKeysHost != countPatch) { - DEME_ERROR( - "Patch-based contact voting produced %zu unique patch pairs, but expected %zu pairs for " - "contact type %d!", - numUniqueKeysHost, countPatch, contact_type); - } - // Step 3: Normalize the voted normals by total area and scatter back to a temp array. + // Optional debug-only safety check (removed from release path for full GPU orientation). + DEME_DEBUG_EXEC({ + solverScratchSpace.syncDualStructDeviceToHost("numUniqueKeys"); + size_t numUniqueKeysHost = *(solverScratchSpace.getDualStructHost("numUniqueKeys")); + if (numUniqueKeysHost != countPatch) { + DEME_ERROR( + "Patch-based contact voting produced %zu unique patch pairs, but expected %zu pairs for " + "contact type %d!", + numUniqueKeysHost, countPatch, contact_type); + } + }); + + // Step 3: Normalize voted normals. float3* votedNormals = (float3*)solverScratchSpace.allocateTempVector("votedNormals", countPatch * sizeof(float3)); normalizeAndScatterVotedNormals(votedWeightedNormals, votedNormals, countPatch, streamInfo.stream); solverScratchSpace.finishUsingTempVector("votedWeightedNormals"); - // displayDeviceFloat3(votedNormals, countPatch); - - // Step 4: Compute projected penetration and area for each primitive contact - // Both the penetration and area are projected onto the voted normal - // If the projected penetration becomes negative, both are set to 0 - // Reuse keys array for the reduce-by-key operation - double* projectedPenetrations = (double*)solverScratchSpace.allocateTempVector( - "projectedPenetrations", countPrimitive * sizeof(double)); - double* projectedAreas = - (double*)solverScratchSpace.allocateTempVector("projectedAreas", countPrimitive * sizeof(double)); - computeWeightedUsefulPenetration(&granData, votedNormals, keys, areas, projectedPenetrations, - projectedAreas, startOffsetPrimitive, startOffsetPatch, countPrimitive, - streamInfo.stream); - solverScratchSpace.finishUsingTempVector("areas"); - - // Step 5: Reduce-by-key to get total projected area per patch pair (sum) - double* totalProjectedAreas = - (double*)solverScratchSpace.allocateTempVector("totalProjectedAreas", countPatch * sizeof(double)); - cubSumReduceByKey(keys, uniqueKeys, projectedAreas, totalProjectedAreas, - numUniqueKeys, countPrimitive, streamInfo.stream, - solverScratchSpace); - // Step 6: Reduce-by-key to get max projected penetration per patch pair (max). - // This result, maxProjectedPenetrations, is the max of projected penetration, aka the max pen in the - // physical overlap case, and it's not the same as maxPenetrations in step 9 which is a fallback - // primitive-derived penetration. - double* maxProjectedPenetrations = (double*)solverScratchSpace.allocateTempVector( - "maxProjectedPenetrations", countPatch * sizeof(double)); - cubMaxReduceByKey(keys, uniqueKeys, projectedPenetrations, - maxProjectedPenetrations, numUniqueKeys, countPrimitive, - streamInfo.stream, solverScratchSpace); - - // Step 7: Compute weighted contact points for each primitive (normal case) - // The weight is: projected_penetration * projected_area - // Reuse keys, uniqueKeys, and numUniqueKeys that are still allocated - double3* weightedContactPoints = (double3*)solverScratchSpace.allocateTempVector( - "weightedContactPoints", countPrimitive * sizeof(double3)); - double* contactWeights = - (double*)solverScratchSpace.allocateTempVector("contactWeights", countPrimitive * sizeof(double)); - computeWeightedContactPoints(&granData, weightedContactPoints, contactWeights, projectedPenetrations, - projectedAreas, startOffsetPrimitive, countPrimitive, streamInfo.stream); - solverScratchSpace.finishUsingTempVector("projectedPenetrations"); - solverScratchSpace.finishUsingTempVector("projectedAreas"); - // Reduce-by-key to get total weighted contact points per patch pair - double3* totalWeightedContactPoints = (double3*)solverScratchSpace.allocateTempVector( - "totalWeightedContactPoints", countPatch * sizeof(double3)); - double* totalContactWeights = - (double*)solverScratchSpace.allocateTempVector("totalContactWeights", countPatch * sizeof(double)); - cubSumReduceByKey(keys, uniqueKeys, weightedContactPoints, - totalWeightedContactPoints, numUniqueKeys, countPrimitive, - streamInfo.stream, solverScratchSpace); - cubSumReduceByKey(keys, uniqueKeys, contactWeights, totalContactWeights, - numUniqueKeys, countPrimitive, streamInfo.stream, - solverScratchSpace); - solverScratchSpace.finishUsingTempVector("weightedContactPoints"); - solverScratchSpace.finishUsingTempVector("contactWeights"); - // Compute voted contact points per patch pair by dividing by total weight - double3* votedContactPoints = - (double3*)solverScratchSpace.allocateTempVector("votedContactPoints", countPatch * sizeof(double3)); - computeFinalContactPointsPerPatch(totalWeightedContactPoints, totalContactWeights, votedContactPoints, - countPatch, streamInfo.stream); - solverScratchSpace.finishUsingTempVector("totalWeightedContactPoints"); - solverScratchSpace.finishUsingTempVector("totalContactWeights"); - - // Step 8: Handle zero-area patches (all primitive areas are 0) + // Step 4: Compute per-primitive patch accumulators (projected area, max projected penetration, + // and weighted contact-point sums) in one pass. + PatchContactAccum* primitivePatchAccumulators = (PatchContactAccum*)solverScratchSpace.allocateTempVector( + "primitivePatchAccumulators", countPrimitive * sizeof(PatchContactAccum)); + computePatchContactAccumulators(&granData, votedNormals, keys, primitivePatchAccumulators, + startOffsetPrimitive, startOffsetPatch, countPrimitive, + streamInfo.stream); + + // Step 5: Reduce-by-key accumulators to patch level (sum + max). + PatchContactAccum* patchContactAccumulators = (PatchContactAccum*)solverScratchSpace.allocateTempVector( + "patchContactAccumulators", countPatch * sizeof(PatchContactAccum)); + cubSumReduceByKey( + keys, uniqueKeys, primitivePatchAccumulators, patchContactAccumulators, numUniqueKeys, countPrimitive, + streamInfo.stream, solverScratchSpace); + solverScratchSpace.finishUsingTempVector("primitivePatchAccumulators"); + + // Step 6: Handle zero-area patches (all primitive areas are 0) // For these patches, we need to find the max penetration primitive and use its normal/penetration - // 8a: Extract primitive penetrations for max-reduce + // 6a: Extract primitive penetrations for max-reduce double* primitivePenetrations = (double*)solverScratchSpace.allocateTempVector( "primitivePenetrations", countPrimitive * sizeof(double)); extractPrimitivePenetrations(&granData, primitivePenetrations, startOffsetPrimitive, countPrimitive, streamInfo.stream); - // 8b: Max-negative-reduce-by-key to get max negative penetration per patch + // 6b: Max-negative-reduce-by-key to get max negative penetration per patch // This finds the largest negative value (smallest absolute value among negatives) // Positive values are treated as very negative to indicate invalid/non-physical state double* maxPenetrations = @@ -2779,7 +2729,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( streamInfo.stream, solverScratchSpace); solverScratchSpace.finishUsingTempVector("primitivePenetrations"); - // 8c: Find max-penetration primitives for zero-area patches and extract their normals, penetrations, + // 6c: Find max-penetration primitives for zero-area patches and extract their normals, penetrations, // and contact points float3* zeroAreaNormals = (float3*)solverScratchSpace.allocateTempVector("zeroAreaNormals", countPatch * sizeof(float3)); @@ -2792,8 +2742,8 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( startOffsetPrimitive, startOffsetPatch, countPrimitive, streamInfo.stream); solverScratchSpace.finishUsingTempVector("maxPenetrations"); - // Step 8d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) - // If no primitive satisfies SAT, the patch contact is non-physical and should use Step 9 fallback + // Step 6d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) + // If no primitive satisfies SAT, the patch contact is non-physical and should use Step 7 fallback notStupidBool_t* patchHasSAT = nullptr; if (contact_type == TRIANGLE_TRIANGLE_CONTACT) { patchHasSAT = (notStupidBool_t*)solverScratchSpace.allocateTempVector( @@ -2803,12 +2753,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( streamInfo.stream); } - // Clean up keys arrays now that we're done with reductions - solverScratchSpace.finishUsingTempVector("votingKeys"); - solverScratchSpace.finishUsingTempVector("uniqueKeys"); - solverScratchSpace.finishUsingDualStruct("numUniqueKeys"); - - // Step 9: Finalize patch results by combining voting with zero-area handling. + // Step 7: Finalize patch results by combining voting with zero-area handling. // If patch-based projected area is 0 (or this patch pair consists of no SAT pair), meaning no physical // contact, we use the fallback estimations (zeroArea*) of CP, penetration and areas. double* finalAreas = @@ -2822,18 +2767,24 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( double3* finalContactPoints = (double3*)solverScratchSpace.allocateTempVector("finalContactPoints", countPatch * sizeof(double3)); - finalizePatchResults(totalProjectedAreas, votedNormals, maxProjectedPenetrations, votedContactPoints, - zeroAreaNormals, zeroAreaPenetrations, zeroAreaContactPoints, patchHasSAT, - finalAreas, finalNormals, finalPenetrations.data(), finalContactPoints, countPatch, - streamInfo.stream); - solverScratchSpace.finishUsingTempVector("totalProjectedAreas"); + finalizePatchResultsFromAccumulators(patchContactAccumulators, votedNormals, zeroAreaNormals, + zeroAreaPenetrations, zeroAreaContactPoints, patchHasSAT, + finalAreas, finalNormals, finalPenetrations.data(), + finalContactPoints, countPatch, streamInfo.stream); + + // Clean up temporaries no longer needed past this point. + solverScratchSpace.finishUsingTempVector("patchContactAccumulators"); solverScratchSpace.finishUsingTempVector("votedNormals"); - solverScratchSpace.finishUsingTempVector("maxProjectedPenetrations"); solverScratchSpace.finishUsingTempVector("zeroAreaNormals"); solverScratchSpace.finishUsingTempVector("zeroAreaPenetrations"); - solverScratchSpace.finishUsingTempVector("votedContactPoints"); solverScratchSpace.finishUsingTempVector("zeroAreaContactPoints"); - solverScratchSpace.finishUsingTempVector("patchHasSAT"); + if (patchHasSAT != nullptr) { + solverScratchSpace.finishUsingTempVector("patchHasSAT"); + } + + // Clean up CUB bookkeeping buffers. + solverScratchSpace.finishUsingTempVector("uniqueKeys"); + solverScratchSpace.finishUsingDualStruct("numUniqueKeys"); // Now we have: // - finalAreas: final contact area per patch pair (countPatch elements) diff --git a/src/DEM/kT.cpp b/src/DEM/kT.cpp index 3c0b7aa1..ab8c824b 100644 --- a/src/DEM/kT.cpp +++ b/src/DEM/kT.cpp @@ -1271,6 +1271,7 @@ void DEMKinematicThread::prewarmKernels() { sphere_contact_kernels->kernel("getNumberOfSphereContactsEachBin").instantiate(); } if (bin_triangle_kernels) { + bin_triangle_kernels->kernel("precomputeTriangleSandwichData").instantiate(); bin_triangle_kernels->kernel("getNumberOfBinsEachTriangleTouches").instantiate(); bin_triangle_kernels->kernel("populateBinTriangleTouchingPairs").instantiate(); } diff --git a/src/algorithms/DEMContactDetection.cu b/src/algorithms/DEMContactDetection.cu index 0666c35b..5565e0ee 100644 --- a/src/algorithms/DEMContactDetection.cu +++ b/src/algorithms/DEMContactDetection.cu @@ -574,11 +574,42 @@ void contactDetection(std::shared_ptr& bin_sphere_kern numAnalGeoTriTouches = (objID_t*)scratchPad.allocateTempVector("numAnalGeoTriTouches", CD_temp_arr_bytes); } + + // Triangle prepass: compute world vertices/bounds/shift once, reuse in both sweeps. + CD_temp_arr_bytes = simParams->nTriGM * sizeof(float3); + float3* tri_vA1 = (float3*)scratchPad.allocateTempVector("tri_vA1", CD_temp_arr_bytes); + float3* tri_vB1 = (float3*)scratchPad.allocateTempVector("tri_vB1", CD_temp_arr_bytes); + float3* tri_vC1 = (float3*)scratchPad.allocateTempVector("tri_vC1", CD_temp_arr_bytes); + float3* tri_vA2 = (float3*)scratchPad.allocateTempVector("tri_vA2", CD_temp_arr_bytes); + float3* tri_vB2 = (float3*)scratchPad.allocateTempVector("tri_vB2", CD_temp_arr_bytes); + float3* tri_vC2 = (float3*)scratchPad.allocateTempVector("tri_vC2", CD_temp_arr_bytes); + float3* tri_shift = (float3*)scratchPad.allocateTempVector("tri_shift", CD_temp_arr_bytes); + + CD_temp_arr_bytes = simParams->nTriGM * sizeof(int3); + int3* tri_L1 = (int3*)scratchPad.allocateTempVector("tri_L1", CD_temp_arr_bytes); + int3* tri_U1 = (int3*)scratchPad.allocateTempVector("tri_U1", CD_temp_arr_bytes); + int3* tri_L2 = (int3*)scratchPad.allocateTempVector("tri_L2", CD_temp_arr_bytes); + int3* tri_U2 = (int3*)scratchPad.allocateTempVector("tri_U2", CD_temp_arr_bytes); + + CD_temp_arr_bytes = simParams->nTriGM * sizeof(uint8_t); + uint8_t* tri_ok1 = (uint8_t*)scratchPad.allocateTempVector("tri_ok1", CD_temp_arr_bytes); + uint8_t* tri_ok2 = (uint8_t*)scratchPad.allocateTempVector("tri_ok2", CD_temp_arr_bytes); + + bin_triangle_kernels->kernel("precomputeTriangleSandwichData") + .instantiate() + .configure(dim3(blocks_needed_for_tri), dim3(DEME_NUM_TRIANGLE_PER_BLOCK), 0, this_stream) + .launch(&simParams, &granData, + tri_vA1, tri_vB1, tri_vC1, tri_vA2, tri_vB2, tri_vC2, + tri_shift, tri_L1, tri_U1, tri_L2, tri_U2, tri_ok1, tri_ok2, + sandwichANode1, sandwichANode2, sandwichANode3, + sandwichBNode1, sandwichBNode2, sandwichBNode3); + bin_triangle_kernels->kernel("getNumberOfBinsEachTriangleTouches") .instantiate() .configure(dim3(blocks_needed_for_tri), dim3(DEME_NUM_TRIANGLE_PER_BLOCK), 0, this_stream) - .launch(&simParams, &granData, numBinsTriTouches, numAnalGeoTriTouches, sandwichANode1, sandwichANode2, - sandwichANode3, sandwichBNode1, sandwichBNode2, sandwichBNode3, + .launch(&simParams, &granData, numBinsTriTouches, numAnalGeoTriTouches, + tri_vA1, tri_vB1, tri_vC1, tri_vA2, tri_vB2, tri_vC2, + tri_shift, tri_L1, tri_U1, tri_L2, tri_U2, tri_ok1, tri_ok2, solverFlags.meshUniversalContact); // std::cout << "numBinsTriTouches: " << std::endl; // displayDeviceArray(numBinsTriTouches, simParams->nTriGM); @@ -651,9 +682,26 @@ void contactDetection(std::shared_ptr& bin_sphere_kern bin_triangle_kernels->kernel("populateBinTriangleTouchingPairs") .instantiate() .configure(dim3(blocks_needed_for_tri), dim3(DEME_NUM_TRIANGLE_PER_BLOCK), 0, this_stream) - .launch(&simParams, &granData, numBinsTriTouchesScan, numAnalGeoTriTouchesScan, binIDsEachTriTouches, - triIDsEachBinTouches, sandwichANode1, sandwichANode2, sandwichANode3, sandwichBNode1, - sandwichBNode2, sandwichBNode3, idTriA, idGeoB, dType, solverFlags.meshUniversalContact); + .launch(&simParams, &granData, numBinsTriTouchesScan, numAnalGeoTriTouchesScan, + binIDsEachTriTouches, triIDsEachBinTouches, + tri_vA1, tri_vB1, tri_vC1, tri_vA2, tri_vB2, tri_vC2, + tri_shift, tri_L1, tri_U1, tri_L2, tri_U2, tri_ok1, tri_ok2, + idTriA, idGeoB, dType, solverFlags.meshUniversalContact); + + scratchPad.finishUsingTempVector("tri_vA1"); + scratchPad.finishUsingTempVector("tri_vB1"); + scratchPad.finishUsingTempVector("tri_vC1"); + scratchPad.finishUsingTempVector("tri_vA2"); + scratchPad.finishUsingTempVector("tri_vB2"); + scratchPad.finishUsingTempVector("tri_vC2"); + scratchPad.finishUsingTempVector("tri_shift"); + scratchPad.finishUsingTempVector("tri_L1"); + scratchPad.finishUsingTempVector("tri_U1"); + scratchPad.finishUsingTempVector("tri_L2"); + scratchPad.finishUsingTempVector("tri_U2"); + scratchPad.finishUsingTempVector("tri_ok1"); + scratchPad.finishUsingTempVector("tri_ok2"); + // std::cout << "binIDsEachTriTouches: " << std::endl; // displayDeviceArray(binIDsEachTriTouches, *pNumBinTriTouchPairs); // std::cout << "dType: " << std::endl; diff --git a/src/algorithms/DEMCubInstantiations.cu b/src/algorithms/DEMCubInstantiations.cu index 3c93680d..7e8d5c7f 100644 --- a/src/algorithms/DEMCubInstantiations.cu +++ b/src/algorithms/DEMCubInstantiations.cu @@ -97,7 +97,15 @@ template void cubSumReduceByKey(contactPairs_t* d_keys_i size_t n, cudaStream_t& this_stream, DEMSolverScratchData& scratchPad); - +// Patch contact accumulators (sum + max) with contactPairs_t keys +template void cubSumReduceByKey(contactPairs_t* d_keys_in, + contactPairs_t* d_unique_out, + PatchContactAccum* d_vals_in, + PatchContactAccum* d_aggregates_out, + size_t* d_num_out, + size_t n, + cudaStream_t& this_stream, + DEMSolverScratchData& scratchPad); //////////////////////////////////////////////////////////////////////////////// // Reduce::Max diff --git a/src/algorithms/DEMDynamicMisc.cu b/src/algorithms/DEMDynamicMisc.cu index 43143cbe..1e55ca7d 100644 --- a/src/algorithms/DEMDynamicMisc.cu +++ b/src/algorithms/DEMDynamicMisc.cu @@ -119,53 +119,43 @@ void getContactForcesConcerningOwners(float3* d_points, // Patch-based voting kernels for mesh contact correction //////////////////////////////////////////////////////////////////////////////// -// Kernel to compute weighted normals (normal * area) for voting +// Kernel to compute weighted normals (normal * area / penetration) for voting // Also prepares the area values for reduction and extracts the keys (geomToPatchMap values) + +// Optimized overload: prepare weighted normals only (no temporary areas/keys arrays). __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, - float3* weightedNormals, - double* areas, - contactPairs_t* keys, - contactPairs_t startOffset, - contactPairs_t count) { + float3* weightedNormals, + contactPairs_t startOffset, + contactPairs_t count) { contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { contactPairs_t myContactID = startOffset + idx; - // Get the contact normal from contactForces - float3 normal = granData->contactForces[myContactID]; + // Normal and geometric quantities were produced by the primitive contact kernels. + const float3 normal = granData->contactForces[myContactID]; + const float3 areaStorage = granData->contactPointGeometryB[myContactID]; + const float area = float3StorageToDouble(areaStorage); - // Extract the area (double) from contactPointGeometryB (stored as float3) - float3 areaStorage = granData->contactPointGeometryB[myContactID]; - double area = float3StorageToDouble(areaStorage); - float3 penStorage = granData->contactPointGeometryA[myContactID]; - double penetration = float3StorageToDouble(penStorage); + // Penetration is used to weight the vote (validated legacy semantics). + const float3 penStorage = granData->contactPointGeometryA[myContactID]; + float penetration = float3StorageToDouble(penStorage); penetration = (penetration > DEME_TINY_FLOAT) ? penetration : DEME_TINY_FLOAT; - double recipPen = 1.0 / penetration; - - // Compute weighted normal (normal * area) - // Note that fake contacts do not affect as their area is 0 - weightedNormals[idx] = make_float3((double)normal.x * area * recipPen, (double)normal.y * area * recipPen, - (double)normal.z * area * recipPen); - - // Store area for reduction - areas[idx] = area; + const float weight = area / penetration; - // Extract key from geomToPatchMap - keys[idx] = granData->geomToPatchMap[myContactID]; + weightedNormals[idx] = make_float3((double)normal.x * weight, (double)normal.y * weight, + (double)normal.z * weight); } } void prepareWeightedNormalsForVoting(DEMDataDT* granData, float3* weightedNormals, - double* areas, - contactPairs_t* keys, contactPairs_t startOffset, contactPairs_t count, cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { prepareWeightedNormalsForVoting_impl<<>>( - granData, weightedNormals, areas, keys, startOffset, count); + granData, weightedNormals, startOffset, count); } } @@ -204,6 +194,147 @@ void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, } } +//////////////////////////////////////////////////////////////////////////////// +// Fused patch aggregation kernels (projected area, penetration, contact point) +//////////////////////////////////////////////////////////////////////////////// + +// Per-primitive accumulator generation. +// +// This replaces the former pipeline: +// computeWeightedUsefulPenetration -> ReduceByKey(sum projArea) +// ReduceByKey(max projPen) +// computeWeightedContactPoints -> ReduceByKey(sum weightedCP) -> ReduceByKey(sum weight) +// +// It produces the same patch-level quantities, but materializes only one array +// (PatchContactAccum) and performs a single ReduceByKey. +__global__ void computePatchContactAccumulators_impl(DEMDataDT* granData, + const float3* votedNormals, + const contactPairs_t* keys, + PatchContactAccum* accumulators, + contactPairs_t startOffsetPrimitive, + contactPairs_t startOffsetPatch, + contactPairs_t count) { + contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < count) { + const contactPairs_t myContactID = startOffsetPrimitive + idx; + + // Map this primitive to its patch-pair index, then to local [0, countPatch) index. + const contactPairs_t patchIdx = keys[idx]; + const contactPairs_t localPatchIdx = patchIdx - startOffsetPatch; + + const float3 votedNormal = votedNormals[localPatchIdx]; + const float3 originalNormal = granData->contactForces[myContactID]; + + // Penetration depth (positive means overlap/contact); negative is non-contact and does not contribute. + const float3 penStorage = granData->contactPointGeometryA[myContactID]; + double originalPenetration = float3StorageToDouble(penStorage); + originalPenetration = (originalPenetration > 0.0) ? originalPenetration : 0.0; + + // Contact area (non-negative; fake contacts have 0 area and thus contribute 0). + const float3 areaStorage = granData->contactPointGeometryB[myContactID]; + const double area = float3StorageToDouble(areaStorage); + + // Projection factor: clamp negative dot products to 0 (tangential/opposing contributions do not participate). + const float dotProduct = dot(originalNormal, votedNormal); + const double cospos = (dotProduct > 0.f) ? (double)dotProduct : 0.0; + + const double projectedPenetration = originalPenetration * cospos; + const double projectedArea = area * cospos; + + const double weight = projectedPenetration * projectedArea; + + const double3 contactPoint = to_double3(granData->contactTorque_convToForce[myContactID]); + const double3 weightedCP = make_double3(contactPoint.x * weight, contactPoint.y * weight, contactPoint.z * weight); + + PatchContactAccum acc; + acc.sumProjArea = projectedArea; + acc.maxProjPen = projectedPenetration; + acc.sumWeight = weight; + acc.sumWeightedCP = weightedCP; + accumulators[idx] = acc; + } +} + +void computePatchContactAccumulators(DEMDataDT* granData, + const float3* votedNormals, + const contactPairs_t* keys, + PatchContactAccum* accumulators, + contactPairs_t startOffsetPrimitive, + contactPairs_t startOffsetPatch, + contactPairs_t count, + cudaStream_t& this_stream) { + size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + if (blocks_needed > 0) { + computePatchContactAccumulators_impl<<>>( + granData, votedNormals, keys, accumulators, startOffsetPrimitive, startOffsetPatch, count); + } +} + +// Finalization from patch accumulators (no intermediate per-patch arrays). +__global__ void finalizePatchResultsFromAccumulators_impl(const PatchContactAccum* patchAccumulators, + const float3* votedNormals, + const float3* zeroAreaNormals, + const double* zeroAreaPenetrations, + const double3* zeroAreaContactPoints, + const notStupidBool_t* patchHasSAT, + double* finalAreas, + float3* finalNormals, + double* finalPenetrations, + double3* finalContactPoints, + contactPairs_t count) { + contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < count) { + const PatchContactAccum acc = patchAccumulators[idx]; + const double projectedArea = acc.sumProjArea; + + // Default to 1 (SAT satisfied) for non-triangle-triangle contacts where patchHasSAT is null + const notStupidBool_t hasSAT = (patchHasSAT != nullptr) ? patchHasSAT[idx] : 1; + + // Use voted results only if projectedArea > 0 AND at least one primitive satisfies SAT + if (projectedArea > 0.0 && hasSAT) { + finalAreas[idx] = projectedArea; + finalNormals[idx] = votedNormals[idx]; + finalPenetrations[idx] = acc.maxProjPen; + + if (acc.sumWeight > 0.0) { + const double invW = 1.0 / acc.sumWeight; + finalContactPoints[idx] = make_double3(acc.sumWeightedCP.x * invW, + acc.sumWeightedCP.y * invW, + acc.sumWeightedCP.z * invW); + } else { + // If total weight is 0, contact point is set to (0,0,0) + finalContactPoints[idx] = make_double3(0.0, 0.0, 0.0); + } + } else { + // Zero-area case OR no SAT-satisfying primitives: fallback to max-penetration primitive's results + finalAreas[idx] = 0.0; + finalNormals[idx] = zeroAreaNormals[idx]; + finalPenetrations[idx] = zeroAreaPenetrations[idx]; + finalContactPoints[idx] = zeroAreaContactPoints[idx]; + } + } +} + +void finalizePatchResultsFromAccumulators(const PatchContactAccum* patchAccumulators, + const float3* votedNormals, + const float3* zeroAreaNormals, + const double* zeroAreaPenetrations, + const double3* zeroAreaContactPoints, + const notStupidBool_t* patchHasSAT, + double* finalAreas, + float3* finalNormals, + double* finalPenetrations, + double3* finalContactPoints, + contactPairs_t count, + cudaStream_t& this_stream) { + size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + if (blocks_needed > 0) { + finalizePatchResultsFromAccumulators_impl<<>>( + patchAccumulators, votedNormals, zeroAreaNormals, zeroAreaPenetrations, zeroAreaContactPoints, patchHasSAT, + finalAreas, finalNormals, finalPenetrations, finalContactPoints, count); + } +} + //////////////////////////////////////////////////////////////////////////////// // Penetration depth computation kernels for mesh contact correction //////////////////////////////////////////////////////////////////////////////// diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index 87bac353..d81697b1 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -172,7 +172,11 @@ void getContactForcesConcerningOwners(float3* d_points, // Patch-based voting wrappers for mesh contact correction //////////////////////////////////////////////////////////////////////////////// -// Prepares weighted normals (normal * area), areas, and keys from geomToPatchMap for voting +// Prepares weighted normals (normal * area / penetration) for voting. +// +// The weighted normal magnitude represents the voting power. The subsequent normalization step only +// needs the *direction*, therefore any positive scalar multiple of the weight yields the same +// voted direction. The current implementation follows the existing, validated semantics. void prepareWeightedNormalsForVoting(DEMDataDT* granData, float3* weightedNormals, double* areas, @@ -181,6 +185,16 @@ void prepareWeightedNormalsForVoting(DEMDataDT* granData, contactPairs_t count, cudaStream_t& this_stream); +// Optimized overload: prepares weighted normals only. +// +// This avoids materializing temporary areas/keys buffers. Keys can be sourced directly from +// granData->geomToPatchMap + startOffsetPrimitive in the caller. +void prepareWeightedNormalsForVoting(DEMDataDT* granData, + float3* weightedNormals, + contactPairs_t startOffset, + contactPairs_t count, + cudaStream_t& this_stream); + // Normalizes voted normals by total area and scatters to output // If total area is 0, output is (0,0,0) indicating no contact void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, @@ -188,6 +202,59 @@ void normalizeAndScatterVotedNormals(float3* votedWeightedNormals, contactPairs_t count, cudaStream_t& this_stream); +// Patch-level accumulator used to fuse multiple ReduceByKey passes. +// +// The reduction operator is component-wise associative (sum + max), therefore it can safely be used +// with CUB ReduceByKey. +struct PatchContactAccum { + double sumProjArea; ///< Sum of projected contact areas (per patch) + double maxProjPen; ///< Max projected penetration (per patch) + double sumWeight; ///< Sum of weights w = projectedPenetration * projectedArea (per patch) + double3 sumWeightedCP; ///< Sum of (contactPoint * w) (per patch) + + __host__ __device__ __forceinline__ PatchContactAccum operator+(const PatchContactAccum& other) const { + PatchContactAccum out; + out.sumProjArea = sumProjArea + other.sumProjArea; + out.maxProjPen = (maxProjPen > other.maxProjPen) ? maxProjPen : other.maxProjPen; + out.sumWeight = sumWeight + other.sumWeight; + out.sumWeightedCP = make_double3(sumWeightedCP.x + other.sumWeightedCP.x, + sumWeightedCP.y + other.sumWeightedCP.y, + sumWeightedCP.z + other.sumWeightedCP.z); + return out; + } +}; + +// Computes per-primitive patch accumulators: +// - sumProjArea: projected area contribution +// - maxProjPen: projected penetration contribution (to be reduced by max) +// - sumWeight: weight contribution (for contact point averaging) +// - sumWeightedCP: weighted contact point contribution +void computePatchContactAccumulators(DEMDataDT* granData, + const float3* votedNormals, + const contactPairs_t* keys, + PatchContactAccum* accumulators, + contactPairs_t startOffsetPrimitive, + contactPairs_t startOffsetPatch, + contactPairs_t count, + cudaStream_t& this_stream); + +// Finalizes patch results by combining patch-accumulator voting with zero-area / SAT-fail fallback. +// +// Semantics match finalizePatchResults(), but avoids materializing intermediate arrays +// (totalProjectedAreas, votedPenetrations, votedContactPoints). +void finalizePatchResultsFromAccumulators(const PatchContactAccum* patchAccumulators, + const float3* votedNormals, + const float3* zeroAreaNormals, + const double* zeroAreaPenetrations, + const double3* zeroAreaContactPoints, + const notStupidBool_t* patchHasSAT, + double* finalAreas, + float3* finalNormals, + double* finalPenetrations, + double3* finalContactPoints, + contactPairs_t count, + cudaStream_t& this_stream); + // Computes projected penetration and area for each primitive contact // Both the penetration and area are projected onto the voted normal // If the projected penetration becomes negative, both are set to 0 diff --git a/src/kernel/DEMBinTriangleKernels.cu b/src/kernel/DEMBinTriangleKernels.cu index faf24ed6..beba2d36 100644 --- a/src/kernel/DEMBinTriangleKernels.cu +++ b/src/kernel/DEMBinTriangleKernels.cu @@ -145,382 +145,447 @@ inline __device__ bool figureOutNodeAndBoundingBox(deme::DEMSimParams* simParams return boundingBoxIntersectBinAxisBounds(L, U, vA, vB, vC, simParams); } + +__global__ void precomputeTriangleSandwichData(deme::DEMSimParams* simParams, + deme::DEMDataKT* granData, + // World-space vertices for both sandwich triangles + float3* vA1_all, + float3* vB1_all, + float3* vC1_all, + float3* vA2_all, + float3* vB2_all, + float3* vC2_all, + // Per-triangle translation B = A + shift_world + float3* shift_world_all, + // Per-triangle bounds for A and B (only valid if ok flag true) + int3* LA_all, + int3* UA_all, + int3* LB_all, + int3* UB_all, + // ok flags + unsigned char* ok1_all, + unsigned char* ok2_all, + // sandwich nodes (local, as produced by makeTriangleSandwich) + float3* nodeA1, + float3* nodeB1, + float3* nodeC1, + float3* nodeA2, + float3* nodeB2, + float3* nodeC2) { + deme::bodyID_t triID = blockIdx.x * blockDim.x + threadIdx.x; + if (triID >= simParams->nTriGM) { + return; + } + + float3 vA1, vB1, vC1, vA2, vB2, vC2; + deme::binID_t L1[3], L2[3], U1[3], U2[3]; + + const bool ok1 = figureOutNodeAndBoundingBox(simParams, granData, triID, vA1, vB1, vC1, L1, U1, + nodeA1[triID], nodeB1[triID], nodeC1[triID]); + const bool ok2 = figureOutNodeAndBoundingBox(simParams, granData, triID, vA2, vB2, vC2, L2, U2, + nodeA2[triID], nodeB2[triID], nodeC2[triID]); + + vA1_all[triID] = vA1; + vB1_all[triID] = vB1; + vC1_all[triID] = vC1; + vA2_all[triID] = vA2; + vB2_all[triID] = vB2; + vC2_all[triID] = vC2; + + ok1_all[triID] = (unsigned char)(ok1 ? 1 : 0); + ok2_all[triID] = (unsigned char)(ok2 ? 1 : 0); + + if (ok1) { + LA_all[triID] = make_int3(L1[0], L1[1], L1[2]); + UA_all[triID] = make_int3(U1[0], U1[1], U1[2]); + } + if (ok2) { + LB_all[triID] = make_int3(L2[0], L2[1], L2[2]); + UB_all[triID] = make_int3(U2[0], U2[1], U2[2]); + } + + // Precompute sandwich translation (B = A + shift_world) in numerically safe way. + float3 shift_world = make_float3(0.f, 0.f, 0.f); + if (ok2) { + const deme::bodyID_t owner = granData->ownerTriMesh[triID]; + const float qw = granData->oriQw[owner]; + const float qx = granData->oriQx[owner]; + const float qy = granData->oriQy[owner]; + const float qz = granData->oriQz[owner]; + float3 shift_local = make_float3(nodeA2[triID].x - nodeA1[triID].x, + nodeA2[triID].y - nodeA1[triID].y, + nodeA2[triID].z - nodeA1[triID].z); + applyOriQToVector3(shift_local.x, shift_local.y, shift_local.z, qw, qx, qy, qz); + shift_world = shift_local; + } + shift_world_all[triID] = shift_world; +} + +// Prepass versions of the existing kernels (signature includes precomputed arrays). __global__ void getNumberOfBinsEachTriangleTouches(deme::DEMSimParams* simParams, - deme::DEMDataKT* granData, - deme::binsTriangleTouches_t* numBinsTriTouches, - deme::objID_t* numAnalGeoTriTouches, - float3* nodeA1, - float3* nodeB1, - float3* nodeC1, - float3* nodeA2, - float3* nodeB2, - float3* nodeC2, - bool meshUniversalContact) { + deme::DEMDataKT* granData, + deme::binsTriangleTouches_t* numBinsTriTouches, + deme::objID_t* numAnalGeoTriTouches, + // precomputed + const float3* vA1_all, + const float3* vB1_all, + const float3* vC1_all, + const float3* vA2_all, + const float3* vB2_all, + const float3* vC2_all, + const float3* shift_world_all, + const int3* LA_all, + const int3* UA_all, + const int3* LB_all, + const int3* UB_all, + const unsigned char* ok1_all, + const unsigned char* ok2_all, + bool meshUniversalContact) { deme::bodyID_t triID = blockIdx.x * blockDim.x + threadIdx.x; + if (triID >= simParams->nTriGM) { + return; + } - if (triID < simParams->nTriGM) { - // 3 vertices of the triangle, in true space location but without adding LBF point (since purely voxel- and - // bin-based locations don't need that) - float3 vA1, vB1, vC1, vA2, vB2, vC2; - deme::binID_t L1[3], L2[3], U1[3], U2[3]; - const bool ok1 = figureOutNodeAndBoundingBox(simParams, granData, triID, vA1, vB1, vC1, L1, U1, nodeA1[triID], - nodeB1[triID], nodeC1[triID]); - const bool ok2 = figureOutNodeAndBoundingBox(simParams, granData, triID, vA2, vB2, vC2, L2, U2, nodeA2[triID], - nodeB2[triID], nodeC2[triID]); - - // Precompute triangle edges/normal once per triangle (translation-invariant). - // We translate vertices per-bin (v - boxCenter) before calling triBoxOverlapBinFastLocalEdgesUnionShiftFP32(). - - // If neither triangle sandwich intersects the bin grid, it cannot touch any bin. - if (!ok1 && !ok2) { - numBinsTriTouches[triID] = 0; - if (meshUniversalContact) { - numAnalGeoTriTouches[triID] = 0; - } - return; - } + const bool ok1 = (ok1_all[triID] != 0); + const bool ok2 = (ok2_all[triID] != 0); - // Preserve per-triangle bounds for cheap gating inside the union sweep (Option C). - // (We will overwrite L1/U1 when we merge into the union bounds.) - deme::binID_t LA[3], UA[3]; - if (ok1) { - LA[0] = L1[0]; - LA[1] = L1[1]; - LA[2] = L1[2]; - UA[0] = U1[0]; - UA[1] = U1[1]; - UA[2] = U1[2]; + if (!ok1 && !ok2) { + numBinsTriTouches[triID] = 0; + if (meshUniversalContact) { + numAnalGeoTriTouches[triID] = 0; } + return; + } - // Precompute the sandwich translation (B = A + shift_world) once per triangle, in a numerically safe way: - // compute in local coords (small numbers), then rotate into world. - float3 shift_world = make_float3(0.f, 0.f, 0.f); - if (ok2) { - const deme::bodyID_t myOwnerID_shift = granData->ownerTriMesh[triID]; - const float myOriQw_shift = granData->oriQw[myOwnerID_shift]; - const float myOriQx_shift = granData->oriQx[myOwnerID_shift]; - const float myOriQy_shift = granData->oriQy[myOwnerID_shift]; - const float myOriQz_shift = granData->oriQz[myOwnerID_shift]; - - // Use any vertex pair; for a sandwich it is a constant translation for all vertices. - float3 shift_local = make_float3(nodeA2[triID].x - nodeA1[triID].x, nodeA2[triID].y - nodeA1[triID].y, - nodeA2[triID].z - nodeA1[triID].z); - applyOriQToVector3(shift_local.x, shift_local.y, shift_local.z, myOriQw_shift, - myOriQx_shift, myOriQy_shift, myOriQz_shift); - shift_world = shift_local; - } + const float3 vA1 = vA1_all[triID]; + const float3 vB1 = vB1_all[triID]; + const float3 vC1 = vC1_all[triID]; + const float3 vA2 = vA2_all[triID]; + const float3 vB2 = vB2_all[triID]; + const float3 vC2 = vC2_all[triID]; + const float3 shift_world = shift_world_all[triID]; + + int3 LA = make_int3(0, 0, 0), UA = make_int3(-1, -1, -1); + int3 LB = make_int3(0, 0, 0), UB = make_int3(-1, -1, -1); + if (ok1) { + LA = LA_all[triID]; + UA = UA_all[triID]; + } + if (ok2) { + LB = LB_all[triID]; + UB = UB_all[triID]; + } - // Merge bounds (or take the valid one, if only one is valid). - if (ok1 && ok2) { - L1[0] = DEME_MIN(L1[0], L2[0]); - L1[1] = DEME_MIN(L1[1], L2[1]); - L1[2] = DEME_MIN(L1[2], L2[2]); - U1[0] = DEME_MAX(U1[0], U2[0]); - U1[1] = DEME_MAX(U1[1], U2[1]); - U1[2] = DEME_MAX(U1[2], U2[2]); - } else if (!ok1) { - L1[0] = L2[0]; - L1[1] = L2[1]; - L1[2] = L2[2]; - U1[0] = U2[0]; - U1[1] = U2[1]; - U1[2] = U2[2]; - } + // Union bounds + deme::binID_t Lx, Ly, Lz, Ux, Uy, Uz; + if (ok1 && ok2) { + Lx = (deme::binID_t)DEME_MIN(LA.x, LB.x); + Ly = (deme::binID_t)DEME_MIN(LA.y, LB.y); + Lz = (deme::binID_t)DEME_MIN(LA.z, LB.z); + Ux = (deme::binID_t)DEME_MAX(UA.x, UB.x); + Uy = (deme::binID_t)DEME_MAX(UA.y, UB.y); + Uz = (deme::binID_t)DEME_MAX(UA.z, UB.z); + } else if (ok1) { + Lx = (deme::binID_t)LA.x; + Ly = (deme::binID_t)LA.y; + Lz = (deme::binID_t)LA.z; + Ux = (deme::binID_t)UA.x; + Uy = (deme::binID_t)UA.y; + Uz = (deme::binID_t)UA.z; + } else { + Lx = (deme::binID_t)LB.x; + Ly = (deme::binID_t)LB.y; + Lz = (deme::binID_t)LB.z; + Ux = (deme::binID_t)UB.x; + Uy = (deme::binID_t)UB.y; + Uz = (deme::binID_t)UB.z; + } - unsigned int numSDsTouched = 0; - // Triangle may span a collection of bins... - // BTW, I don't know why Chrono::GPU had to check the so-called 3 cases, and create thread divergence like that. - // Just sweep through all potential bins and you are fine. - float BinCenter[3]; - const float binSizeF = (float)simParams->dyn.binSize; - const float binHalfSpan = binSizeF * (0.5f + (float)DEME_BIN_ENLARGE_RATIO_FOR_FACETS); - float BinHalfSizes[3] = {binHalfSpan, binHalfSpan, binHalfSpan}; - const float startX = binSizeF * (float)L1[0] + 0.5f * binSizeF; - const float startY = binSizeF * (float)L1[1] + 0.5f * binSizeF; - const float startZ = binSizeF * (float)L1[2] + 0.5f * binSizeF; - for (deme::binID_t i = L1[0], ix = 0; i <= U1[0]; i++, ix++) { - float cy0 = startY; - BinCenter[0] = startX + ix * binSizeF; - for (deme::binID_t j = L1[1]; j <= U1[1]; j++) { - float cz = startZ; - BinCenter[1] = cy0; - for (deme::binID_t k = L1[2]; k <= U1[2]; k++) { - BinCenter[2] = cz; - const float3 c = make_float3(BinCenter[0], BinCenter[1], BinCenter[2]); - // Bounds-gating, only test the triangle(s) that can possibly touch this bin. - const bool inA = - ok1 && (i >= LA[0] && i <= UA[0] && j >= LA[1] && j <= UA[1] && k >= LA[2] && k <= UA[2]); - const bool inB = - ok2 && (i >= L2[0] && i <= U2[0] && j >= L2[1] && j <= U2[1] && k >= L2[2] && k <= U2[2]); - if (!inA && !inB) { - continue; - } - const float3 a0 = make_float3(vA1.x - c.x, vA1.y - c.y, vA1.z - c.z); - const float3 a1 = make_float3(vB1.x - c.x, vB1.y - c.y, vB1.z - c.z); - const float3 a2 = make_float3(vC1.x - c.x, vC1.y - c.y, vC1.z - c.z); - const bool hitFast = - triBoxOverlapBinLocalEdgesUnionShiftFP32(a0, a1, a2, shift_world, binHalfSpan, inA, inB); - if (hitFast) { - numSDsTouched++; - } + unsigned int numSDsTouched = 0; + const float binSizeF = (float)simParams->dyn.binSize; + const float binHalfSpan = binSizeF * (0.5f + (float)DEME_BIN_ENLARGE_RATIO_FOR_FACETS); + const float startX = binSizeF * (float)Lx + 0.5f * binSizeF; + const float startY = binSizeF * (float)Ly + 0.5f * binSizeF; + const float startZ = binSizeF * (float)Lz + 0.5f * binSizeF; + + float BinCenter[3]; + for (deme::binID_t i = Lx, ix = 0; i <= Ux; i++, ix++) { + float cy0 = startY; + BinCenter[0] = startX + ix * binSizeF; + for (deme::binID_t j = Ly; j <= Uy; j++) { + float cz = startZ; + BinCenter[1] = cy0; + for (deme::binID_t k = Lz; k <= Uz; k++) { + BinCenter[2] = cz; + const float3 c = make_float3(BinCenter[0], BinCenter[1], BinCenter[2]); + + const bool inA = ok1 && (i >= (deme::binID_t)LA.x && i <= (deme::binID_t)UA.x && + j >= (deme::binID_t)LA.y && j <= (deme::binID_t)UA.y && + k >= (deme::binID_t)LA.z && k <= (deme::binID_t)UA.z); + const bool inB = ok2 && (i >= (deme::binID_t)LB.x && i <= (deme::binID_t)UB.x && + j >= (deme::binID_t)LB.y && j <= (deme::binID_t)UB.y && + k >= (deme::binID_t)LB.z && k <= (deme::binID_t)UB.z); + if (!inA && !inB) { cz += binSizeF; + continue; } - cy0 += binSizeF; + + const float3 a0 = make_float3(vA1.x - c.x, vA1.y - c.y, vA1.z - c.z); + const float3 a1 = make_float3(vB1.x - c.x, vB1.y - c.y, vB1.z - c.z); + const float3 a2 = make_float3(vC1.x - c.x, vC1.y - c.y, vC1.z - c.z); + const bool hit = triBoxOverlapBinLocalEdgesUnionShiftFP32(a0, a1, a2, shift_world, binHalfSpan, inA, inB); + if (hit) { + numSDsTouched++; + } + cz += binSizeF; } + cy0 += binSizeF; } - numBinsTriTouches[triID] = numSDsTouched; + } - // No need to do the following if meshUniversalContact is false - if (meshUniversalContact) { - // Register sphere--analytical geometry contacts - deme::objID_t contact_count = 0; - // Each triangle should also check if it overlaps with an analytical boundary-type geometry - for (deme::objID_t objB = 0; objB < simParams->nAnalGM; objB++) { - deme::bodyID_t objBOwner = objOwner[objB]; - // Grab family number from memory (not jitified: b/c family number can change frequently in a sim) - unsigned int objFamilyNum = granData->familyID[objBOwner]; - deme::bodyID_t triOwnerID = granData->ownerTriMesh[triID]; - unsigned int triFamilyNum = granData->familyID[triOwnerID]; - unsigned int maskMatID = locateMaskPair(triFamilyNum, objFamilyNum); - // If marked no contact, skip ths iteration - if (granData->familyMasks[maskMatID] != deme::DONT_PREVENT_CONTACT) { - continue; - } - float3 ownerXYZ; - voxelIDToPosition( - ownerXYZ.x, ownerXYZ.y, ownerXYZ.z, granData->voxelID[objBOwner], granData->locX[objBOwner], - granData->locY[objBOwner], granData->locZ[objBOwner], _nvXp2_, _nvYp2_, _voxelSize_, _l_); - const float ownerOriQw = granData->oriQw[objBOwner]; - const float ownerOriQx = granData->oriQx[objBOwner]; - const float ownerOriQy = granData->oriQy[objBOwner]; - const float ownerOriQz = granData->oriQz[objBOwner]; - float objBRelPosX = objRelPosX[objB]; - float objBRelPosY = objRelPosY[objB]; - float objBRelPosZ = objRelPosZ[objB]; - float objBRotX = objRotX[objB]; - float objBRotY = objRotY[objB]; - float objBRotZ = objRotZ[objB]; - applyOriQToVector3(objBRelPosX, objBRelPosY, objBRelPosZ, ownerOriQw, ownerOriQx, - ownerOriQy, ownerOriQz); - applyOriQToVector3(objBRotX, objBRotY, objBRotZ, ownerOriQw, ownerOriQx, - ownerOriQy, ownerOriQz); - float3 objBPosXYZ = ownerXYZ + make_float3(objBRelPosX, objBRelPosY, objBRelPosZ); - - deme::contact_t contact_type = checkTriEntityOverlapFP32( - vA1, vB1, vC1, objType[objB], objBPosXYZ, make_float3(objBRotX, objBRotY, objBRotZ), objSize1[objB], - objSize2[objB], objSize3[objB], objNormal[objB], granData->marginSizeAnalytical[objB]); - if (contact_type == deme::NOT_A_CONTACT) { - contact_type = checkTriEntityOverlapFP32(vA2, vB2, vC2, objType[objB], objBPosXYZ, - make_float3(objBRotX, objBRotY, objBRotZ), objSize1[objB], - objSize2[objB], objSize3[objB], objNormal[objB], - granData->marginSizeAnalytical[objB]); - } - // Unlike the sphere-X contact case, we do not test against family extra margin here. This may result in - // more fake contact pairs, but the efficiency in the mesh-based particle case is not our top priority - // yet. - if (contact_type == deme::TRIANGLE_ANALYTICAL_CONTACT) { - contact_count++; - } + numBinsTriTouches[triID] = numSDsTouched; + + if (meshUniversalContact) { + deme::objID_t contact_count = 0; + for (deme::objID_t objB = 0; objB < simParams->nAnalGM; objB++) { + deme::bodyID_t objBOwner = objOwner[objB]; + unsigned int objFamilyNum = granData->familyID[objBOwner]; + deme::bodyID_t triOwnerID = granData->ownerTriMesh[triID]; + unsigned int triFamilyNum = granData->familyID[triOwnerID]; + unsigned int maskMatID = locateMaskPair(triFamilyNum, objFamilyNum); + if (granData->familyMasks[maskMatID] != deme::DONT_PREVENT_CONTACT) { + continue; + } + + float3 ownerXYZ; + voxelIDToPosition( + ownerXYZ.x, ownerXYZ.y, ownerXYZ.z, granData->voxelID[objBOwner], granData->locX[objBOwner], + granData->locY[objBOwner], granData->locZ[objBOwner], _nvXp2_, _nvYp2_, _voxelSize_, _l_); + + const float ownerOriQw = granData->oriQw[objBOwner]; + const float ownerOriQx = granData->oriQx[objBOwner]; + const float ownerOriQy = granData->oriQy[objBOwner]; + const float ownerOriQz = granData->oriQz[objBOwner]; + + float objBRelPosX = objRelPosX[objB]; + float objBRelPosY = objRelPosY[objB]; + float objBRelPosZ = objRelPosZ[objB]; + float objBRotX = objRotX[objB]; + float objBRotY = objRotY[objB]; + float objBRotZ = objRotZ[objB]; + + applyOriQToVector3(objBRelPosX, objBRelPosY, objBRelPosZ, + ownerOriQw, ownerOriQx, ownerOriQy, ownerOriQz); + applyOriQToVector3(objBRotX, objBRotY, objBRotZ, + ownerOriQw, ownerOriQx, ownerOriQy, ownerOriQz); + + float3 objBPosXYZ = ownerXYZ + make_float3(objBRelPosX, objBRelPosY, objBRelPosZ); + + deme::contact_t contact_type = checkTriEntityOverlapFP32( + vA1, vB1, vC1, objType[objB], objBPosXYZ, make_float3(objBRotX, objBRotY, objBRotZ), objSize1[objB], + objSize2[objB], objSize3[objB], objNormal[objB], granData->marginSizeAnalytical[objB]); + + if (contact_type == deme::NOT_A_CONTACT) { + contact_type = checkTriEntityOverlapFP32( + vA2, vB2, vC2, objType[objB], objBPosXYZ, make_float3(objBRotX, objBRotY, objBRotZ), + objSize1[objB], objSize2[objB], objSize3[objB], objNormal[objB], granData->marginSizeAnalytical[objB]); + } + + if (contact_type == deme::TRIANGLE_ANALYTICAL_CONTACT) { + contact_count++; } - numAnalGeoTriTouches[triID] = contact_count; } + numAnalGeoTriTouches[triID] = contact_count; } } __global__ void populateBinTriangleTouchingPairs(deme::DEMSimParams* simParams, - deme::DEMDataKT* granData, - deme::binsTriangleTouchPairs_t* numBinsTriTouchesScan, - deme::binsTriangleTouchPairs_t* numAnalGeoTriTouchesScan, - deme::binID_t* binIDsEachTriTouches, - deme::bodyID_t* triIDsEachBinTouches, - float3* nodeA1, - float3* nodeB1, - float3* nodeC1, - float3* nodeA2, - float3* nodeB2, - float3* nodeC2, - deme::bodyID_t* idGeoA, - deme::bodyID_t* idGeoB, - deme::contact_t* contactTypePrimitive, - bool meshUniversalContact) { + deme::DEMDataKT* granData, + deme::binsTriangleTouchPairs_t* numBinsTriTouchesScan, + deme::binsTriangleTouchPairs_t* numAnalGeoTriTouchesScan, + deme::binID_t* binIDsEachTriTouches, + deme::bodyID_t* triIDsEachBinTouches, + // precomputed + const float3* vA1_all, + const float3* vB1_all, + const float3* vC1_all, + const float3* vA2_all, + const float3* vB2_all, + const float3* vC2_all, + const float3* shift_world_all, + const int3* LA_all, + const int3* UA_all, + const int3* LB_all, + const int3* UB_all, + const unsigned char* ok1_all, + const unsigned char* ok2_all, + // tri-anal output + deme::bodyID_t* idGeoA, + deme::bodyID_t* idGeoB, + deme::contact_t* contactTypePrimitive, + bool meshUniversalContact) { deme::bodyID_t triID = blockIdx.x * blockDim.x + threadIdx.x; - if (triID < simParams->nTriGM) { - // 3 vertices of the triangle - float3 vA1, vB1, vC1, vA2, vB2, vC2; - deme::binID_t L1[3], L2[3], U1[3], U2[3]; - const bool ok1 = figureOutNodeAndBoundingBox(simParams, granData, triID, vA1, vB1, vC1, L1, U1, nodeA1[triID], - nodeB1[triID], nodeC1[triID]); - const bool ok2 = figureOutNodeAndBoundingBox(simParams, granData, triID, vA2, vB2, vC2, L2, U2, nodeA2[triID], - nodeB2[triID], nodeC2[triID]); - - // Precompute triangle edges/normal once per triangle (translation-invariant). - // We translate vertices per-bin (v - boxCenter) before calling triBoxOverlapBinFastLocalEdgesUnionShiftFP32(). - - // If neither triangle sandwich intersects the bin grid, it cannot touch any bin. - if (!ok1 && !ok2) { - return; - } + if (triID >= simParams->nTriGM) { + return; + } - // Preserve per-triangle bounds for cheap gating inside the union sweep (Option C). - // (We will overwrite L1/U1 when we merge into the union bounds.) - deme::binID_t LA[3], UA[3]; - if (ok1) { - LA[0] = L1[0]; - LA[1] = L1[1]; - LA[2] = L1[2]; - UA[0] = U1[0]; - UA[1] = U1[1]; - UA[2] = U1[2]; - } + const bool ok1 = (ok1_all[triID] != 0); + const bool ok2 = (ok2_all[triID] != 0); - // Precompute the sandwich translation (B = A + shift_world) once per triangle, in a numerically safe way: - // compute in local coords (small numbers), then rotate into world. - float3 shift_world = make_float3(0.f, 0.f, 0.f); - if (ok2) { - const deme::bodyID_t myOwnerID_shift = granData->ownerTriMesh[triID]; - const float myOriQw_shift = granData->oriQw[myOwnerID_shift]; - const float myOriQx_shift = granData->oriQx[myOwnerID_shift]; - const float myOriQy_shift = granData->oriQy[myOwnerID_shift]; - const float myOriQz_shift = granData->oriQz[myOwnerID_shift]; - - // Use any vertex pair; for a sandwich it is a constant translation for all vertices. - float3 shift_local = make_float3(nodeA2[triID].x - nodeA1[triID].x, nodeA2[triID].y - nodeA1[triID].y, - nodeA2[triID].z - nodeA1[triID].z); - applyOriQToVector3(shift_local.x, shift_local.y, shift_local.z, myOriQw_shift, - myOriQx_shift, myOriQy_shift, myOriQz_shift); - shift_world = shift_local; - } + if (!ok1 && !ok2) { + return; + } - // Merge bounds (or take the valid one, if only one is valid). - if (ok1 && ok2) { - L1[0] = DEME_MIN(L1[0], L2[0]); - L1[1] = DEME_MIN(L1[1], L2[1]); - L1[2] = DEME_MIN(L1[2], L2[2]); - U1[0] = DEME_MAX(U1[0], U2[0]); - U1[1] = DEME_MAX(U1[1], U2[1]); - U1[2] = DEME_MAX(U1[2], U2[2]); - } else if (!ok1) { - L1[0] = L2[0]; - L1[1] = L2[1]; - L1[2] = L2[2]; - U1[0] = U2[0]; - U1[1] = U2[1]; - U1[2] = U2[2]; - } + const float3 vA1 = vA1_all[triID]; + const float3 vB1 = vB1_all[triID]; + const float3 vC1 = vC1_all[triID]; + const float3 vA2 = vA2_all[triID]; + const float3 vB2 = vB2_all[triID]; + const float3 vC2 = vC2_all[triID]; + const float3 shift_world = shift_world_all[triID]; + + int3 LA = make_int3(0, 0, 0), UA = make_int3(-1, -1, -1); + int3 LB = make_int3(0, 0, 0), UB = make_int3(-1, -1, -1); + if (ok1) { + LA = LA_all[triID]; + UA = UA_all[triID]; + } + if (ok2) { + LB = LB_all[triID]; + UB = UB_all[triID]; + } - deme::binsTriangleTouchPairs_t myReportOffset = numBinsTriTouchesScan[triID]; - // In case this sweep does not agree with the previous one, we need to intercept such potential segfaults - const deme::binsTriangleTouchPairs_t myReportOffset_end = numBinsTriTouchesScan[triID + 1]; - - // Triangle may span a collection of bins... - float BinCenter[3]; - const float binSizeF = (float)simParams->dyn.binSize; - const float binHalfSpan = binSizeF * (0.5f + (float)DEME_BIN_ENLARGE_RATIO_FOR_FACETS); - float BinHalfSizes[3] = {binHalfSpan, binHalfSpan, binHalfSpan}; - const float startX = binSizeF * (float)L1[0] + 0.5f * binSizeF; - const float startY = binSizeF * (float)L1[1] + 0.5f * binSizeF; - const float startZ = binSizeF * (float)L1[2] + 0.5f * binSizeF; - for (deme::binID_t i = L1[0], ix = 0; i <= U1[0]; i++, ix++) { - BinCenter[0] = startX + ix * binSizeF; - float cy0 = startY; - for (deme::binID_t j = L1[1]; j <= U1[1]; j++) { - BinCenter[1] = cy0; - float cz = startZ; - for (deme::binID_t k = L1[2]; k <= U1[2]; k++) { - if (myReportOffset >= myReportOffset_end) { - continue; // Don't step on the next triangle's domain - } - BinCenter[2] = cz; - const float3 c = make_float3(BinCenter[0], BinCenter[1], BinCenter[2]); - // Bounds-gating, only test the triangle(s) that can possibly touch this bin. - const bool inA = - ok1 && (i >= LA[0] && i <= UA[0] && j >= LA[1] && j <= UA[1] && k >= LA[2] && k <= UA[2]); - const bool inB = - ok2 && (i >= L2[0] && i <= U2[0] && j >= L2[1] && j <= U2[1] && k >= L2[2] && k <= U2[2]); - if (!inA && !inB) { - continue; - } - const float3 a0 = make_float3(vA1.x - c.x, vA1.y - c.y, vA1.z - c.z); - const float3 a1 = make_float3(vB1.x - c.x, vB1.y - c.y, vB1.z - c.z); - const float3 a2 = make_float3(vC1.x - c.x, vC1.y - c.y, vC1.z - c.z); - const bool hitFast = - triBoxOverlapBinLocalEdgesUnionShiftFP32(a0, a1, a2, shift_world, binHalfSpan, inA, inB); - if (hitFast) { - binIDsEachTriTouches[myReportOffset] = - binIDFrom3Indices(i, j, k, simParams->nbX, simParams->nbY, simParams->nbZ); - triIDsEachBinTouches[myReportOffset] = triID; - myReportOffset++; - } - cz += binSizeF; - } - cy0 += binSizeF; - } - } - // This can happen for like 1 in 10^9 chance, for the tri--bin contact algorithm has stochasticity on GPU - for (; myReportOffset < myReportOffset_end; myReportOffset++) { - binIDsEachTriTouches[myReportOffset] = deme::NULL_BINID; - triIDsEachBinTouches[myReportOffset] = triID; - } + // Union bounds + deme::binID_t Lx, Ly, Lz, Ux, Uy, Uz; + if (ok1 && ok2) { + Lx = (deme::binID_t)DEME_MIN(LA.x, LB.x); + Ly = (deme::binID_t)DEME_MIN(LA.y, LB.y); + Lz = (deme::binID_t)DEME_MIN(LA.z, LB.z); + Ux = (deme::binID_t)DEME_MAX(UA.x, UB.x); + Uy = (deme::binID_t)DEME_MAX(UA.y, UB.y); + Uz = (deme::binID_t)DEME_MAX(UA.z, UB.z); + } else if (ok1) { + Lx = (deme::binID_t)LA.x; + Ly = (deme::binID_t)LA.y; + Lz = (deme::binID_t)LA.z; + Ux = (deme::binID_t)UA.x; + Uy = (deme::binID_t)UA.y; + Uz = (deme::binID_t)UA.z; + } else { + Lx = (deme::binID_t)LB.x; + Ly = (deme::binID_t)LB.y; + Lz = (deme::binID_t)LB.z; + Ux = (deme::binID_t)UB.x; + Uy = (deme::binID_t)UB.y; + Uz = (deme::binID_t)UB.z; + } - // No need to do the following if meshUniversalContact is false - if (meshUniversalContact) { - deme::binsTriangleTouchPairs_t myTriGeoReportOffset = numAnalGeoTriTouchesScan[triID]; - deme::binsTriangleTouchPairs_t myTriGeoReportOffset_end = numAnalGeoTriTouchesScan[triID + 1]; - for (deme::objID_t objB = 0; objB < simParams->nAnalGM; objB++) { - deme::bodyID_t objBOwner = objOwner[objB]; - // Grab family number from memory (not jitified: b/c family number can change frequently in a sim) - unsigned int objFamilyNum = granData->familyID[objBOwner]; - deme::bodyID_t triOwnerID = granData->ownerTriMesh[triID]; - unsigned int triFamilyNum = granData->familyID[triOwnerID]; - unsigned int maskMatID = locateMaskPair(triFamilyNum, objFamilyNum); - // If marked no contact, skip ths iteration - if (granData->familyMasks[maskMatID] != deme::DONT_PREVENT_CONTACT) { + // Write tri-bin pairs + const deme::binsTriangleTouchPairs_t myReportOffset = numBinsTriTouchesScan[triID]; + const deme::binsTriangleTouchPairs_t myUpperBound = numBinsTriTouchesScan[triID + 1]; + + deme::binsTriangleTouchPairs_t count = 0; + const float binSizeF = (float)simParams->dyn.binSize; + const float binHalfSpan = binSizeF * (0.5f + (float)DEME_BIN_ENLARGE_RATIO_FOR_FACETS); + const float startX = binSizeF * (float)Lx + 0.5f * binSizeF; + const float startY = binSizeF * (float)Ly + 0.5f * binSizeF; + const float startZ = binSizeF * (float)Lz + 0.5f * binSizeF; + + float BinCenter[3]; + for (deme::binID_t i = Lx, ix = 0; i <= Ux; i++, ix++) { + float cy0 = startY; + BinCenter[0] = startX + ix * binSizeF; + for (deme::binID_t j = Ly; j <= Uy; j++) { + float cz = startZ; + BinCenter[1] = cy0; + for (deme::binID_t k = Lz; k <= Uz; k++) { + BinCenter[2] = cz; + const float3 c = make_float3(BinCenter[0], BinCenter[1], BinCenter[2]); + + const bool inA = ok1 && (i >= (deme::binID_t)LA.x && i <= (deme::binID_t)UA.x && + j >= (deme::binID_t)LA.y && j <= (deme::binID_t)UA.y && + k >= (deme::binID_t)LA.z && k <= (deme::binID_t)UA.z); + const bool inB = ok2 && (i >= (deme::binID_t)LB.x && i <= (deme::binID_t)UB.x && + j >= (deme::binID_t)LB.y && j <= (deme::binID_t)UB.y && + k >= (deme::binID_t)LB.z && k <= (deme::binID_t)UB.z); + if (!inA && !inB) { + cz += binSizeF; continue; } - float3 ownerXYZ; - voxelIDToPosition( - ownerXYZ.x, ownerXYZ.y, ownerXYZ.z, granData->voxelID[objBOwner], granData->locX[objBOwner], - granData->locY[objBOwner], granData->locZ[objBOwner], _nvXp2_, _nvYp2_, _voxelSize_, _l_); - const float ownerOriQw = granData->oriQw[objBOwner]; - const float ownerOriQx = granData->oriQx[objBOwner]; - const float ownerOriQy = granData->oriQy[objBOwner]; - const float ownerOriQz = granData->oriQz[objBOwner]; - float objBRelPosX = objRelPosX[objB]; - float objBRelPosY = objRelPosY[objB]; - float objBRelPosZ = objRelPosZ[objB]; - float objBRotX = objRotX[objB]; - float objBRotY = objRotY[objB]; - float objBRotZ = objRotZ[objB]; - applyOriQToVector3(objBRelPosX, objBRelPosY, objBRelPosZ, ownerOriQw, ownerOriQx, - ownerOriQy, ownerOriQz); - applyOriQToVector3(objBRotX, objBRotY, objBRotZ, ownerOriQw, ownerOriQx, - ownerOriQy, ownerOriQz); - float3 objBPosXYZ = ownerXYZ + make_float3(objBRelPosX, objBRelPosY, objBRelPosZ); - - deme::contact_t contact_type = checkTriEntityOverlapFP32( - vA1, vB1, vC1, objType[objB], objBPosXYZ, make_float3(objBRotX, objBRotY, objBRotZ), objSize1[objB], - objSize2[objB], objSize3[objB], objNormal[objB], granData->marginSizeAnalytical[objB]); - if (contact_type == deme::NOT_A_CONTACT) { - contact_type = checkTriEntityOverlapFP32(vA2, vB2, vC2, objType[objB], objBPosXYZ, - make_float3(objBRotX, objBRotY, objBRotZ), objSize1[objB], - objSize2[objB], objSize3[objB], objNormal[objB], - granData->marginSizeAnalytical[objB]); - } - // Unlike the sphere-X contact case, we do not test against family extra margin here, which is more - // lenient and perhaps makes more fake contacts. - if (contact_type == deme::TRIANGLE_ANALYTICAL_CONTACT) { - idGeoA[myTriGeoReportOffset] = triID; - idGeoB[myTriGeoReportOffset] = (deme::bodyID_t)objB; - contactTypePrimitive[myTriGeoReportOffset] = contact_type; - myTriGeoReportOffset++; - if (myTriGeoReportOffset >= myTriGeoReportOffset_end) { - return; // Don't step on the next triangle's domain + + const float3 a0 = make_float3(vA1.x - c.x, vA1.y - c.y, vA1.z - c.z); + const float3 a1 = make_float3(vB1.x - c.x, vB1.y - c.y, vB1.z - c.z); + const float3 a2 = make_float3(vC1.x - c.x, vC1.y - c.y, vC1.z - c.z); + const bool hit = triBoxOverlapBinLocalEdgesUnionShiftFP32(a0, a1, a2, shift_world, binHalfSpan, inA, inB); + if (hit) { + const deme::binsTriangleTouchPairs_t outIdx = myReportOffset + count; + if (outIdx < myUpperBound) { + binIDsEachTriTouches[outIdx] = binIDFrom3Indices(i, j, k, simParams->nbX, simParams->nbY, + simParams->nbZ); + triIDsEachBinTouches[outIdx] = triID; } + count++; } + + cz += binSizeF; + } + cy0 += binSizeF; + } + } + + // Tri-anal contacts: keep identical to original populate kernel + if (meshUniversalContact) { + const deme::binsTriangleTouchPairs_t myAnalOffset = numAnalGeoTriTouchesScan[triID]; + deme::binsTriangleTouchPairs_t analCount = 0; + for (deme::objID_t objB = 0; objB < simParams->nAnalGM; objB++) { + deme::bodyID_t objBOwner = objOwner[objB]; + unsigned int objFamilyNum = granData->familyID[objBOwner]; + deme::bodyID_t triOwnerID = granData->ownerTriMesh[triID]; + unsigned int triFamilyNum = granData->familyID[triOwnerID]; + unsigned int maskMatID = locateMaskPair(triFamilyNum, objFamilyNum); + if (granData->familyMasks[maskMatID] != deme::DONT_PREVENT_CONTACT) { + continue; + } + + float3 ownerXYZ; + voxelIDToPosition( + ownerXYZ.x, ownerXYZ.y, ownerXYZ.z, granData->voxelID[objBOwner], granData->locX[objBOwner], + granData->locY[objBOwner], granData->locZ[objBOwner], _nvXp2_, _nvYp2_, _voxelSize_, _l_); + + const float ownerOriQw = granData->oriQw[objBOwner]; + const float ownerOriQx = granData->oriQx[objBOwner]; + const float ownerOriQy = granData->oriQy[objBOwner]; + const float ownerOriQz = granData->oriQz[objBOwner]; + + float objBRelPosX = objRelPosX[objB]; + float objBRelPosY = objRelPosY[objB]; + float objBRelPosZ = objRelPosZ[objB]; + float objBRotX = objRotX[objB]; + float objBRotY = objRotY[objB]; + float objBRotZ = objRotZ[objB]; + + applyOriQToVector3(objBRelPosX, objBRelPosY, objBRelPosZ, + ownerOriQw, ownerOriQx, ownerOriQy, ownerOriQz); + applyOriQToVector3(objBRotX, objBRotY, objBRotZ, + ownerOriQw, ownerOriQx, ownerOriQy, ownerOriQz); + + float3 objBPosXYZ = ownerXYZ + make_float3(objBRelPosX, objBRelPosY, objBRelPosZ); + + deme::contact_t contact_type = checkTriEntityOverlapFP32( + vA1, vB1, vC1, objType[objB], objBPosXYZ, make_float3(objBRotX, objBRotY, objBRotZ), objSize1[objB], + objSize2[objB], objSize3[objB], objNormal[objB], granData->marginSizeAnalytical[objB]); + if (contact_type == deme::NOT_A_CONTACT) { + contact_type = checkTriEntityOverlapFP32( + vA2, vB2, vC2, objType[objB], objBPosXYZ, make_float3(objBRotX, objBRotY, objBRotZ), + objSize1[objB], objSize2[objB], objSize3[objB], objNormal[objB], granData->marginSizeAnalytical[objB]); } - // Take care of potentially unfilled slots in the report - for (; myTriGeoReportOffset < myTriGeoReportOffset_end; myTriGeoReportOffset++) { - contactTypePrimitive[myTriGeoReportOffset] = deme::NOT_A_CONTACT; + + if (contact_type == deme::TRIANGLE_ANALYTICAL_CONTACT) { + const deme::binsTriangleTouchPairs_t outIdx = myAnalOffset + analCount; + idGeoA[outIdx] = triID; + idGeoB[outIdx] = (deme::bodyID_t)objB; + contactTypePrimitive[outIdx] = contact_type; + analCount++; } } } diff --git a/src/kernel/DEMCalcForceKernels_Primitive.cu b/src/kernel/DEMCalcForceKernels_Primitive.cu index a70286c6..39754ab4 100644 --- a/src/kernel/DEMCalcForceKernels_Primitive.cu +++ b/src/kernel/DEMCalcForceKernels_Primitive.cu @@ -250,14 +250,23 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi // Use the dedicated SAT check function to determine if triangles are truly in physical contact // Note: checkTriangleTriangleOverlap uses projection which can report contact even for non-physical // "submerged" cases, so we need the actual SAT test for accurate physical contact determination - bool satisfiesSAT = checkTriangleTriangleSAT(triANode1, triANode2, triANode3, triBNode1, - triBNode2, triBNode3); - granData->contactSATSatisfied[myPrimitiveContactID] = satisfiesSAT ? 1 : 0; - - // If SAT says no physical contact potential, drop this pair (projection can report non-physical overlaps) - if (!satisfiesSAT) { + bool check_sat = true; + if (!in_contact && overlapDepth <= -extraMarginSize) { + // This pair is already beyond the extra margin, SAT cannot make it a valid contact + check_sat = false; + granData->contactSATSatisfied[myPrimitiveContactID] = 0; ContactType = deme::NOT_A_CONTACT; } + if (check_sat) { + bool satisfiesSAT = checkTriangleTriangleSAT(triANode1, triANode2, triANode3, triBNode1, + triBNode2, triBNode3); + granData->contactSATSatisfied[myPrimitiveContactID] = satisfiesSAT ? 1 : 0; + + // If SAT says no physical contact potential, drop this pair (projection can report non-physical overlaps) + if (!satisfiesSAT) { + ContactType = deme::NOT_A_CONTACT; + } + } // Fix ContactType if needed // If the solver says in contact, we do not question it diff --git a/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh b/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh index 83dfaab3..f9d55412 100644 --- a/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh +++ b/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh @@ -1149,9 +1149,8 @@ __device__ bool checkTriangleTriangleOverlap( // Triangle B vertices (tri2) const T1 triB[3] = {A2, B2, C2}; - // Compute face normals + // Compute face normal for triangle A first; triangle B normal is only needed if B->A projection hits. T1 nA = normalize(cross(B1 - A1, C1 - A1)); - T1 nB = normalize(cross(B2 - A2, C2 - A2)); //// TODO: And degenerated triangles? @@ -1163,17 +1162,36 @@ __device__ bool checkTriangleTriangleOverlap( // Project triangle B onto triangle A's plane and clip against A T2 depthBA, areaBA; T1 centroidBA; - bool contactBA = projectTriangleOntoTriangle(triB, triA, nA, depthBA, areaBA, centroidBA); + const bool contactBA = projectTriangleOntoTriangle(triB, triA, nA, depthBA, areaBA, centroidBA); + + if (!contactBA) { + // No contact detected, Provide separation info + T1 centA = (triA[0] + triA[1] + triA[2]) / 3.0; + T1 centB = (triB[0] + triB[1] + triB[2]) / 3.0; + T1 sep = centA - centB; + T2 sepLen2 = dot(sep, sep); + + if (sepLen2 > (DEME_TINY_FLOAT * DEME_TINY_FLOAT)) { + T2 sepLen = sqrt(sepLen2); + normal = sep / sepLen; + depth = -sepLen; // Negative for separation + point = (centA + centB) * 0.5; + } else { + normal = nA; + depth = -DEME_TINY_FLOAT; + point = centA; + } + projectedArea = 0.0; + return false; + } // Project triangle A onto triangle B's plane and clip against B + T1 nB = normalize(cross(B2 - A2, C2 - A2)); T2 depthAB, areaAB; T1 centroidAB; - bool contactAB = projectTriangleOntoTriangle(triA, triB, nB, depthAB, areaAB, centroidAB); - - // Determine if there is contact - bool inContact = contactBA && contactAB; + const bool contactAB = projectTriangleOntoTriangle(triA, triB, nB, depthAB, areaAB, centroidAB); - if (!inContact) { + if (!contactAB) { // No contact detected, Provide separation info T1 centA = (triA[0] + triA[1] + triA[2]) / 3.0; T1 centB = (triB[0] + triB[1] + triB[2]) / 3.0; diff --git a/src/kernel/DEMKinematicMisc.cu b/src/kernel/DEMKinematicMisc.cu index 8bb55e46..f0ffc9d9 100644 --- a/src/kernel/DEMKinematicMisc.cu +++ b/src/kernel/DEMKinematicMisc.cu @@ -82,16 +82,21 @@ __global__ void computeMarginFromAbsv_implTri(deme::DEMSimParams* simParams, // as our meshed particle representation is surface only, so we need to account for existing penetration length // in our future-proof contact detection, always. double penetrationMargin = *maxTriTriPenetration; - //// TODO: Temporary measure - penetrationMargin = 0.; // (meshUniversalContact && penetrationMargin > 0.0) ? penetrationMargin : 0.0; + penetrationMargin = (meshUniversalContact && penetrationMargin > 0.0) ? penetrationMargin : 0.0; // Clamp penetration margin to the maximum allowed value to prevent super large margins if (penetrationMargin > simParams->capTriTriPenetration) { penetrationMargin = simParams->capTriTriPenetration; } - - granData->marginSizeTriangle[triID] = + // We hope that penetrationMargin is small, so it's absorbed into the velocity-induce margin. + // But if not, it should prevail to avoid losing contacts involving triangles inside another mesh. + double finalMargin = (double)(vel * simParams->dyn.expSafetyMulti + simParams->dyn.expSafetyAdder) * (*ts) * (*maxDrift) + - penetrationMargin + granData->familyExtraMarginSize[my_family]; + granData->familyExtraMarginSize[my_family]; + // if (finalMargin < penetrationMargin) { + // finalMargin = penetrationMargin; + // } + + granData->marginSizeTriangle[triID] = finalMargin; } } From e07e44b595fe5b7bbe34a303cb77b0206f11013a Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Mon, 19 Jan 2026 23:58:44 +0100 Subject: [PATCH 06/17] Feature Add: Planar Contact Zylinder - a simple/fast plane contact but in cylindric direction --- src/DEM/APIPrivate.cpp | 5 ++ src/DEM/BdrsAndObjs.h | 50 +++++++++++++- src/DEM/Defines.h | 1 + src/demo/DEMdemo_DrumCubes.cpp | 6 +- src/kernel/DEMCollisionKernels_SphSph.cuh | 20 ++++++ .../DEMCollisionKernels_SphTri_TriTri.cuh | 69 +++++++++++++++++++ 6 files changed, 145 insertions(+), 6 deletions(-) diff --git a/src/DEM/APIPrivate.cpp b/src/DEM/APIPrivate.cpp index 7e858194..5148f130 100644 --- a/src/DEM/APIPrivate.cpp +++ b/src/DEM/APIPrivate.cpp @@ -717,6 +717,11 @@ void DEMSolver::preprocessAnalyticalObjs() { addAnalCompTemplate(ANAL_OBJ_TYPE_CYL_INF, comp_mat.at(i), thisLoadExtObj, param.cyl.center, param.cyl.dir, param.cyl.radius, 0, 0, param.cyl.normal); break; + case OBJ_COMPONENT::PLANAR_CYL: + addAnalCompTemplate(ANAL_OBJ_TYPE_PLANAR_CYL, comp_mat.at(i), thisLoadExtObj, param.cyl.center, + param.cyl.dir, param.cyl.radius, 0, 0, + param.cyl.normal); + break; default: DEME_ERROR(std::string("There is at least one analytical boundary that has a type not supported.")); } diff --git a/src/DEM/BdrsAndObjs.h b/src/DEM/BdrsAndObjs.h index 89ac1620..3e8b0b34 100644 --- a/src/DEM/BdrsAndObjs.h +++ b/src/DEM/BdrsAndObjs.h @@ -25,7 +25,7 @@ namespace deme { /// External object type /// Note all of them are `shell', not solid objects. If you need a solid cylinder for example, then use one CYLINDER as /// the side plus 2 CIRCLE as the ends to emulate it. Please be sure to set OUTWARD CYLINDER normal in this case. -enum class OBJ_COMPONENT { PLANE, SPHERE, PLATE, CIRCLE, CYL, CYL_INF, CONE, CONE_INF, TRIANGLE }; +enum class OBJ_COMPONENT { PLANE, SPHERE, PLATE, CIRCLE, CYL, CYL_INF, PLANAR_CYL, CONE, CONE_INF, TRIANGLE }; /// Sphere struct DEMSphereParams_t { @@ -224,6 +224,54 @@ class DEMExternObj : public DEMInitializer { assertThreeElements(axis, "AddCylinder", "axis"); AddCylinder(make_float3(pos[0], pos[1], pos[2]), make_float3(axis[0], axis[1], axis[2]), rad, material, normal); } + + /// Add a z-axis-aligned cylinder of infinite length with planar contact approximation + void AddZPlanarContactCylinder(const float3 pos, + const float rad, + const std::shared_ptr& material, + const objNormal_t normal = ENTITY_NORMAL_INWARD) { + types.push_back(OBJ_COMPONENT::PLANAR_CYL); + materials.push_back(material); + DEMAnalEntParams params; + params.cyl.center = pos; + params.cyl.radius = rad; + params.cyl.dir = make_float3(0, 0, 1); + params.cyl.normal = normal; + entity_params.push_back(params); + } + void AddZPlanarContactCylinder(const std::vector& pos, + const float rad, + const std::shared_ptr& material, + const objNormal_t normal = ENTITY_NORMAL_INWARD) { + assertThreeElements(pos, "AddZPlanarContactCylinder", "pos"); + AddZPlanarContactCylinder(make_float3(pos[0], pos[1], pos[2]), rad, material, normal); + } + + /// Add a cylinder of infinite length with planar contact approximation, along a user-specific axis + void AddPlanarContactCylinder(const float3 pos, + const float3 axis, + const float rad, + const std::shared_ptr& material, + const objNormal_t normal = ENTITY_NORMAL_INWARD) { + types.push_back(OBJ_COMPONENT::PLANAR_CYL); + materials.push_back(material); + DEMAnalEntParams params; + params.cyl.center = pos; + params.cyl.radius = rad; + params.cyl.dir = normalize(axis); + params.cyl.normal = normal; + entity_params.push_back(params); + } + void AddPlanarContactCylinder(const std::vector& pos, + const std::vector& axis, + const float rad, + const std::shared_ptr& material, + const objNormal_t normal = ENTITY_NORMAL_INWARD) { + assertThreeElements(pos, "AddPlanarContactCylinder", "pos"); + assertThreeElements(axis, "AddPlanarContactCylinder", "axis"); + AddPlanarContactCylinder(make_float3(pos[0], pos[1], pos[2]), make_float3(axis[0], axis[1], axis[2]), rad, + material, normal); + } }; // DEM mesh object diff --git a/src/DEM/Defines.h b/src/DEM/Defines.h index ec20710b..2a7593fa 100644 --- a/src/DEM/Defines.h +++ b/src/DEM/Defines.h @@ -134,6 +134,7 @@ constexpr contact_t ALL_CONTACT_TYPES[NUM_SUPPORTED_CONTACT_TYPES] = { const objType_t ANAL_OBJ_TYPE_PLANE = 0; const objType_t ANAL_OBJ_TYPE_PLATE = 1; const objType_t ANAL_OBJ_TYPE_CYL_INF = 2; +const objType_t ANAL_OBJ_TYPE_PLANAR_CYL = 3; const objNormal_t ENTITY_NORMAL_INWARD = 0; const objNormal_t ENTITY_NORMAL_OUTWARD = 1; diff --git a/src/demo/DEMdemo_DrumCubes.cpp b/src/demo/DEMdemo_DrumCubes.cpp index 8c680f3f..8e28b3af 100644 --- a/src/demo/DEMdemo_DrumCubes.cpp +++ b/src/demo/DEMdemo_DrumCubes.cpp @@ -54,11 +54,7 @@ int main() { float IZZ = CylMass * CylRad * CylRad / 2; float IYY = (CylMass / 12) * (3 * CylRad * CylRad + CylHeight * CylHeight); auto Drum = DEMSim.AddExternalObject(); - // Drum->AddCylinder(CylCenter, CylAxis, CylRad, mat_type_drum, 0); - Drum->AddPlane(make_float3(CylRad, 0, 0), make_float3(-1, 0, 0), mat_type_drum); - Drum->AddPlane(make_float3(-CylRad, 0, 0), make_float3(1, 0, 0), mat_type_drum); - Drum->AddPlane(make_float3(0, CylRad, 0), make_float3(0, -1, 0), mat_type_drum); - Drum->AddPlane(make_float3(0, -CylRad, 0), make_float3(0, 1, 0), mat_type_drum); + Drum->AddPlanarContactCylinder(CylCenter, CylAxis, CylRad, mat_type_drum, ENTITY_NORMAL_INWARD); Drum->SetMass(CylMass); Drum->SetMOI(make_float3(IYY, IYY, IZZ)); auto Drum_tracker = DEMSim.Track(Drum); diff --git a/src/kernel/DEMCollisionKernels_SphSph.cuh b/src/kernel/DEMCollisionKernels_SphSph.cuh index e3c47365..0798af5c 100644 --- a/src/kernel/DEMCollisionKernels_SphSph.cuh +++ b/src/kernel/DEMCollisionKernels_SphSph.cuh @@ -130,6 +130,26 @@ __host__ __device__ deme::contact_t checkSphereEntityOverlap(const T1& A, CP = A - to_real3(cntNormal * (radA - overlapDepth / 2.0)); return contactTypePrimitive; } + case (deme::ANAL_OBJ_TYPE_PLANAR_CYL): { + T1 cyl2sph = cylRadialDistanceVec(A, B, dirB); + const T3 dist_delta_r = length(cyl2sph); + if (dist_delta_r <= (T3)DEME_TINY_FLOAT) { + return deme::NOT_A_CONTACT; + } + const T3 dist_plane = normal_sign * ((T3)size1B - dist_delta_r); + if (dist_plane < 0) { + return deme::NOT_A_CONTACT; + } + cntNormal = to_real3(-normal_sign / dist_delta_r * cyl2sph); + overlapDepth = (T3)(radA + beta4Entity) - dist_plane; + if (overlapDepth <= DEME_TINY_FLOAT) { + contactTypePrimitive = deme::NOT_A_CONTACT; + } else { + contactTypePrimitive = deme::SPHERE_ANALYTICAL_CONTACT; + } + CP = A - to_real3(cntNormal * (dist_plane + overlapDepth / 2.0)); + return contactTypePrimitive; + } default: return deme::NOT_A_CONTACT; } diff --git a/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh b/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh index f9d55412..0861eb99 100644 --- a/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh +++ b/src/kernel/DEMCollisionKernels_SphTri_TriTri.cuh @@ -118,6 +118,30 @@ bool __device__ tri_plane_penetration(const T1** tri, return in_contact; } +template +inline __host__ __device__ bool planar_cyl_plane_from_ref(const T1& ref, + const T1& entityLoc, + const float3& entityDir, + const float& radius, + const float& normal_sign, + T1& plane_point, + float3& plane_normal) { + T1 radial_vec = cylRadialDistanceVec(ref, entityLoc, entityDir); + const auto dist = length(radial_vec); + if (dist <= (decltype(dist))DEME_TINY_FLOAT) { + return false; + } + const T1 radial_dir = radial_vec / dist; + const float dist_plane = normal_sign * (radius - (float)dist); + if (dist_plane < 0) { + return false; + } + plane_normal = to_real3(-normal_sign * radial_dir); + const T1 axis_point = ref - radial_vec; + plane_point = axis_point + radial_dir * radius; + return true; +} + template bool __device__ tri_cyl_penetration(const T1** tri, const T1& entityLoc, @@ -174,6 +198,22 @@ __host__ __device__ deme::contact_t checkTriEntityOverlap(const T1& A, } return deme::NOT_A_CONTACT; } + case (deme::ANAL_OBJ_TYPE_PLANAR_CYL): { + T1 centroid = (A + B + C) / 3.0; + T1 plane_point; + float3 plane_normal; + if (!planar_cyl_plane_from_ref(centroid, entityLoc, entityDir, entitySize1, normal_sign, plane_point, + plane_normal)) { + return deme::NOT_A_CONTACT; + } + for (const T1*& v : tri) { + double d = planeSignedDistance(*v, plane_point, plane_normal); + double overlapDepth = beta4Entity - d; + if (overlapDepth >= 0.0) + return deme::TRIANGLE_ANALYTICAL_CONTACT; + } + return deme::NOT_A_CONTACT; + } default: return deme::NOT_A_CONTACT; } @@ -215,6 +255,22 @@ __host__ __device__ deme::contact_t checkTriEntityOverlapFP32(const T1& A, } return deme::NOT_A_CONTACT; } + case (deme::ANAL_OBJ_TYPE_PLANAR_CYL): { + T1 centroid = (A + B + C) / 3.0f; + T1 plane_point; + float3 plane_normal; + if (!planar_cyl_plane_from_ref(centroid, entityLoc, entityDir, entitySize1, normal_sign, plane_point, + plane_normal)) { + return deme::NOT_A_CONTACT; + } + for (const T1*& v : tri) { + const float d = planeSignedDistance(*v, plane_point, plane_normal); + const float overlapDepth = beta4Entity - d; + if (overlapDepth >= 0.0f) + return deme::TRIANGLE_ANALYTICAL_CONTACT; + } + return deme::NOT_A_CONTACT; + } default: return deme::NOT_A_CONTACT; } @@ -253,6 +309,19 @@ bool __device__ calcTriEntityOverlap(const T1& A, in_contact = tri_cyl_penetration(tri, entityLoc, entityDir, entitySize1, entitySize2, normal_sign, contact_normal, overlapDepth, overlapArea, contactPnt); return in_contact; + case deme::ANAL_OBJ_TYPE_PLANAR_CYL: { + T1 centroid = (A + B + C) / 3.0; + T1 plane_point; + float3 plane_normal; + if (!planar_cyl_plane_from_ref(centroid, entityLoc, entityDir, entitySize1, normal_sign, plane_point, + plane_normal)) { + return false; + } + in_contact = + tri_plane_penetration(tri, plane_point, plane_normal, overlapDepth, overlapArea, contactPnt); + contact_normal = plane_normal; + return in_contact; + } default: return false; } From e016668ba483a1759ee575ebd93ab0d0f20b767c Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Tue, 20 Jan 2026 11:56:55 +0100 Subject: [PATCH 07/17] Async writeout no GPU wait speeds up simulation --- src/DEM/API.h | 5 + src/DEM/APIPublic.cpp | 124 +++++++++++++++------ src/DEM/dT.cpp | 207 +++++++++++++++++++++++++++++++---- src/DEM/dT.h | 9 ++ src/core/utils/JitHelper.cpp | 4 - src/core/utils/JitHelper.h | 7 -- 6 files changed, 293 insertions(+), 63 deletions(-) diff --git a/src/DEM/API.h b/src/DEM/API.h index da1b1752..631f3e64 100644 --- a/src/DEM/API.h +++ b/src/DEM/API.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "kT.h" #include "dT.h" @@ -1203,6 +1204,7 @@ class DEMSolver { /// Remove host-side cached vectors (so you can re-define them, and then re-initialize system) void ClearCache(); + /// Output methods enqueue asynchronous writes; call WaitForPendingOutput() to block for completion. /// Write the current status of clumps to a file void WriteClumpFile(const std::string& outfilename, unsigned int accuracy = 10) const; void WriteClumpFile(const std::filesystem::path& outfilename, unsigned int accuracy = 10) const { @@ -1231,6 +1233,8 @@ class DEMSolver { /// Write the current status of all meshes to a file. void WriteMeshFile(const std::string& outfilename) const; void WriteMeshFile(const std::filesystem::path& outfilename) const { WriteMeshFile(outfilename.string()); } + /// Wait for any in-flight async output to finish. + void WaitForPendingOutput() const; /// @brief Read 3 columns of your choice from a CSV filem and group them by clump_header. /// @param infilename CSV filename. @@ -1570,6 +1574,7 @@ class DEMSolver { bool m_is_out_owner_wildcards = false; bool m_is_out_cnt_wildcards = false; bool m_is_out_geo_wildcards = false; + mutable std::thread m_output_thread; // User-instructed simulation `world' size. Note it is an approximate of the true size and we will generate a world // not smaller than this. This is useful if the user want to automatically add BCs enclosing this user-defined diff --git a/src/DEM/APIPublic.cpp b/src/DEM/APIPublic.cpp index 24b9c5c6..d93358f3 100644 --- a/src/DEM/APIPublic.cpp +++ b/src/DEM/APIPublic.cpp @@ -56,6 +56,7 @@ DEMSolver::DEMSolver(unsigned int nGPUs) { } DEMSolver::~DEMSolver() { + WaitForPendingOutput(); if (sys_initialized) DoDynamicsThenSync(0.0); delete kT; @@ -2028,28 +2029,45 @@ std::shared_ptr DEMSolver::CreateInspector(const std::string& quan } void DEMSolver::WriteSphereFile(const std::string& outfilename) const { + WaitForPendingOutput(); switch (m_out_format) { #ifdef DEME_USE_CHPF case (OUTPUT_FORMAT::CHPF): { - std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); - dT->writeSpheresAsChpf(ptFile); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateClumpHighOrderInfoToHost(); + m_output_thread = std::thread([this, outfilename]() { + std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); + dT->writeSpheresAsChpfFromHost(ptFile); + }); break; } #endif case (OUTPUT_FORMAT::CSV): { - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeSpheresAsCsv(ptFile); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateClumpHighOrderInfoToHost(); + dT->migrateOwnerWildcardToHost(); + dT->migrateSphGeoWildcardToHost(); + m_output_thread = std::thread([this, outfilename]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeSpheresAsCsvFromHost(ptFile); + }); break; } case (OUTPUT_FORMAT::BINARY): { // std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); //// TODO: Implement it - std::ofstream ptFile(outfilename, std::ios::out); DEME_WARNING(std::string("Binary sphere output is not implemented yet, using CSV...")); - dT->writeSpheresAsCsv(ptFile); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateClumpHighOrderInfoToHost(); + dT->migrateOwnerWildcardToHost(); + dT->migrateSphGeoWildcardToHost(); + m_output_thread = std::thread([this, outfilename]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeSpheresAsCsvFromHost(ptFile); + }); break; } default: @@ -2058,28 +2076,43 @@ void DEMSolver::WriteSphereFile(const std::string& outfilename) const { } void DEMSolver::WriteClumpFile(const std::string& outfilename, unsigned int accuracy) const { + WaitForPendingOutput(); switch (m_out_format) { #ifdef DEME_USE_CHPF case (OUTPUT_FORMAT::CHPF): { - std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); - dT->writeClumpsAsChpf(ptFile, accuracy); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateClumpHighOrderInfoToHost(); + m_output_thread = std::thread([this, outfilename, accuracy]() { + std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); + dT->writeClumpsAsChpfFromHost(ptFile, accuracy); + }); break; } #endif case (OUTPUT_FORMAT::CSV): { - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeClumpsAsCsv(ptFile, accuracy); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateClumpHighOrderInfoToHost(); + dT->migrateOwnerWildcardToHost(); + m_output_thread = std::thread([this, outfilename, accuracy]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeClumpsAsCsvFromHost(ptFile, accuracy); + }); break; } case (OUTPUT_FORMAT::BINARY): { // std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); //// TODO: Implement it - std::ofstream ptFile(outfilename, std::ios::out); DEME_WARNING(std::string("Binary clump output is not implemented yet, using CSV...")); - dT->writeClumpsAsCsv(ptFile, accuracy); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateClumpHighOrderInfoToHost(); + dT->migrateOwnerWildcardToHost(); + m_output_thread = std::thread([this, outfilename, accuracy]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeClumpsAsCsvFromHost(ptFile, accuracy); + }); break; } default: @@ -2088,6 +2121,7 @@ void DEMSolver::WriteClumpFile(const std::string& outfilename, unsigned int accu } void DEMSolver::WriteContactFile(const std::string& outfilename, float force_thres) const { + WaitForPendingOutput(); if (no_recording_contact_forces) { DEME_WARNING(std::string( "The solver is instructed to not record contact force info, so no work is done in a WriteContactFile " @@ -2096,18 +2130,26 @@ void DEMSolver::WriteContactFile(const std::string& outfilename, float force_thr } switch (m_cnt_out_format) { case (OUTPUT_FORMAT::CSV): { - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeContactsAsCsv(ptFile, force_thres); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateContactInfoToHost(); + m_output_thread = std::thread([this, outfilename, force_thres]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeContactsAsCsvFromHost(ptFile, force_thres); + }); break; } case (OUTPUT_FORMAT::BINARY): { // std::ofstream ptFile(outfilename, std::ios::out | std::ios::binary); //// TODO: Implement it DEME_WARNING(std::string("Binary contact pair output is not implemented yet, using CSV...")); - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeContactsAsCsv(ptFile, force_thres); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + dT->migrateContactInfoToHost(); + m_output_thread = std::thread([this, outfilename, force_thres]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeContactsAsCsvFromHost(ptFile, force_thres); + }); break; } default: @@ -2118,23 +2160,33 @@ void DEMSolver::WriteContactFile(const std::string& outfilename, float force_thr } void DEMSolver::WriteMeshFile(const std::string& outfilename) const { + WaitForPendingOutput(); switch (m_mesh_out_format) { case (MESH_FORMAT::VTK): { - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeMeshesAsVtk(ptFile); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + m_output_thread = std::thread([this, outfilename]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeMeshesAsVtkFromHost(ptFile); + }); break; } case (MESH_FORMAT::STL): { - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeMeshesAsStl(ptFile); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + m_output_thread = std::thread([this, outfilename]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeMeshesAsStlFromHost(ptFile); + }); break; } case (MESH_FORMAT::PLY): { - std::ofstream ptFile(outfilename, std::ios::out); - dT->writeMeshesAsPly(ptFile); - ptFile.close(); + dT->migrateFamilyToHost(); + dT->migrateClumpPosInfoToHost(); + m_output_thread = std::thread([this, outfilename]() { + std::ofstream ptFile(outfilename, std::ios::out); + dT->writeMeshesAsPlyFromHost(ptFile); + }); break; } default: @@ -2143,6 +2195,12 @@ void DEMSolver::WriteMeshFile(const std::string& outfilename) const { } } +void DEMSolver::WaitForPendingOutput() const { + if (m_output_thread.joinable()) { + m_output_thread.join(); + } +} + size_t DEMSolver::ChangeClumpFamily(unsigned int fam_num, const std::pair& X, const std::pair& Y, diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index 6f2a1622..a72d3197 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -1378,11 +1378,15 @@ void DEMDynamicThread::updateClumpMeshArrays(const std::vector posX(simParams->nSpheresGM); @@ -1458,13 +1462,16 @@ void DEMDynamicThread::writeSpheresAsChpf(std::ofstream& ptFile) { #endif void DEMDynamicThread::writeSpheresAsCsv(std::ofstream& ptFile) { - std::ostringstream outstrstream; - migrateFamilyToHost(); migrateClumpPosInfoToHost(); migrateClumpHighOrderInfoToHost(); migrateOwnerWildcardToHost(); migrateSphGeoWildcardToHost(); + writeSpheresAsCsvFromHost(ptFile); +} + +void DEMDynamicThread::writeSpheresAsCsvFromHost(std::ofstream& ptFile) { + std::ostringstream outstrstream; outstrstream << OUTPUT_FILE_X_COL_NAME + "," + OUTPUT_FILE_Y_COL_NAME + "," + OUTPUT_FILE_Z_COL_NAME + "," + OUTPUT_FILE_R_COL_NAME; @@ -1611,11 +1618,15 @@ void DEMDynamicThread::writeSpheresAsCsv(std::ofstream& ptFile) { #ifdef DEME_USE_CHPF void DEMDynamicThread::writeClumpsAsChpf(std::ofstream& ptFile, unsigned int accuracy) { - //// TODO: Note using accuracy - chpf::Writer pw; migrateFamilyToHost(); migrateClumpPosInfoToHost(); migrateClumpHighOrderInfoToHost(); + writeClumpsAsChpfFromHost(ptFile, accuracy); +} + +void DEMDynamicThread::writeClumpsAsChpfFromHost(std::ofstream& ptFile, unsigned int accuracy) { + //// TODO: Note using accuracy + chpf::Writer pw; // simParams host version should not be different from device version, so no need to update std::vector posX(simParams->nOwnerBodies); @@ -1695,13 +1706,16 @@ void DEMDynamicThread::writeClumpsAsChpf(std::ofstream& ptFile, unsigned int acc #endif void DEMDynamicThread::writeClumpsAsCsv(std::ofstream& ptFile, unsigned int accuracy) { - std::ostringstream outstrstream; - outstrstream.precision(accuracy); - migrateFamilyToHost(); migrateClumpPosInfoToHost(); migrateClumpHighOrderInfoToHost(); migrateOwnerWildcardToHost(); + writeClumpsAsCsvFromHost(ptFile, accuracy); +} + +void DEMDynamicThread::writeClumpsAsCsvFromHost(std::ofstream& ptFile, unsigned int accuracy) { + std::ostringstream outstrstream; + outstrstream.precision(accuracy); // xyz and quaternion are always there outstrstream << OUTPUT_FILE_X_COL_NAME + "," + OUTPUT_FILE_Y_COL_NAME + "," + OUTPUT_FILE_Z_COL_NAME + @@ -1823,11 +1837,13 @@ void DEMDynamicThread::writeClumpsAsCsv(std::ofstream& ptFile, unsigned int accu } std::shared_ptr DEMDynamicThread::generateContactInfo(float force_thres) { - // Migrate contact info to host migrateFamilyToHost(); migrateClumpPosInfoToHost(); migrateContactInfoToHost(); + return generateContactInfoFromHost(force_thres); +} +std::shared_ptr DEMDynamicThread::generateContactInfoFromHost(float force_thres) { size_t total_contacts = *(solverScratchSpace.numContacts); // Wildcards supports only floats now std::vector> existing_wildcards(m_contact_wildcard_names.size()); @@ -2050,9 +2066,122 @@ void DEMDynamicThread::writeContactsAsCsv(std::ofstream& ptFile, float force_thr ptFile << outstrstream.str(); } +void DEMDynamicThread::writeContactsAsCsvFromHost(std::ofstream& ptFile, float force_thres) { + std::ostringstream outstrstream; + + std::shared_ptr contactInfo = generateContactInfoFromHost(force_thres); + + outstrstream << OUTPUT_FILE_CNT_TYPE_NAME; + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::OWNER) { + outstrstream << "," + OUTPUT_FILE_OWNER_1_NAME + "," + OUTPUT_FILE_OWNER_2_NAME; + } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::GEO_ID) { + outstrstream << "," + OUTPUT_FILE_GEO_ID_1_NAME + "," + OUTPUT_FILE_GEO_ID_2_NAME; + } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::FORCE) { + outstrstream << "," + OUTPUT_FILE_FORCE_X_NAME + "," + OUTPUT_FILE_FORCE_Y_NAME + "," + + OUTPUT_FILE_FORCE_Z_NAME; + } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::CNT_POINT) { + outstrstream << "," + OUTPUT_FILE_X_COL_NAME + "," + OUTPUT_FILE_Y_COL_NAME + "," + OUTPUT_FILE_Z_COL_NAME; + } + // if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::COMPONENT) { + // outstrstream << ","+OUTPUT_FILE_COMP_1_NAME+","+OUTPUT_FILE_COMP_2_NAME; + // } + // if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::NICKNAME) { + // outstrstream << ","+OUTPUT_FILE_OWNER_NICKNAME_1_NAME+","+OUTPUT_FILE_OWNER_NICKNAME_2_NAME; + // } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::NORMAL) { + outstrstream << "," + OUTPUT_FILE_NORMAL_X_NAME + "," + OUTPUT_FILE_NORMAL_Y_NAME + "," + + OUTPUT_FILE_NORMAL_Z_NAME; + } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::TORQUE) { + outstrstream << "," + OUTPUT_FILE_TORQUE_X_NAME + "," + OUTPUT_FILE_TORQUE_Y_NAME + "," + + OUTPUT_FILE_TORQUE_Z_NAME; + } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::CNT_WILDCARD) { + // Write all wildcard names as header + for (const auto& w_name : m_contact_wildcard_names) { + outstrstream << "," + w_name; + } + } + outstrstream << "\n"; + + for (size_t i = 0; i < contactInfo->Size(); i++) { + outstrstream << contactInfo->Get("ContactType")[i]; + + // (Internal) ownerID and/or geometry ID + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::OWNER) { + outstrstream << "," << contactInfo->Get("AOwner")[i] << "," + << contactInfo->Get("BOwner")[i]; + } + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::GEO_ID) { + outstrstream << "," << contactInfo->Get("AGeo")[i] << "," + << contactInfo->Get("BGeo")[i]; + } + + // Force is already in global... + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::FORCE) { + outstrstream << "," << contactInfo->Get("Force")[i].x << "," + << contactInfo->Get("Force")[i].y << "," << contactInfo->Get("Force")[i].z; + } + + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::CNT_POINT) { + // oriQ is updated already... whereas the contact point is effectively last step's... That's unfortunate. + // Should we do somthing ahout it? + outstrstream << "," << contactInfo->Get("Point")[i].x << "," + << contactInfo->Get("Point")[i].y << "," << contactInfo->Get("Point")[i].z; + } + + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::NORMAL) { + outstrstream << "," << contactInfo->Get("Normal")[i].x << "," + << contactInfo->Get("Normal")[i].y << "," << contactInfo->Get("Normal")[i].z; + } + + // Torque is in global already... + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::TORQUE) { + outstrstream << "," << contactInfo->Get("Torque")[i].x << "," + << contactInfo->Get("Torque")[i].y << "," << contactInfo->Get("Torque")[i].z; + } + + // Contact wildcards + if (solverFlags.cntOutFlags & CNT_OUTPUT_CONTENT::CNT_WILDCARD) { + // The order shouldn't be an issue... the same set is being processed here and in equip_contact_wildcards, + // see Model.h + for (const auto& name : m_contact_wildcard_names) { + outstrstream << "," << contactInfo->Get(name)[i]; + } + } + + outstrstream << "\n"; + } + + ptFile << outstrstream.str(); +} + void DEMDynamicThread::writeMeshesAsVtk(std::ofstream& ptFile) { - std::ostringstream ostream; migrateFamilyToHost(); + migrateClumpPosInfoToHost(); + writeMeshesAsVtkFromHost(ptFile); +} + +void DEMDynamicThread::writeMeshesAsVtkFromHost(std::ofstream& ptFile) { + std::ostringstream ostream; + + auto ownerPosFromHost = [this](bodyID_t owner) { + double X, Y, Z; + voxelID_t voxel = voxelID[owner]; + subVoxelPos_t subVoxX = locX[owner]; + subVoxelPos_t subVoxY = locY[owner]; + subVoxelPos_t subVoxZ = locZ[owner]; + voxelIDToPosition(X, Y, Z, voxel, subVoxX, subVoxY, subVoxZ, + simParams->nvXp2, simParams->nvYp2, simParams->voxelSize, + simParams->l); + return make_float3(X + simParams->LBFX, Y + simParams->LBFY, Z + simParams->LBFZ); + }; + auto ownerOriQFromHost = [this](bodyID_t owner) { + return make_float4(oriQx[owner], oriQy[owner], oriQz[owner], oriQw[owner]); + }; std::vector vertexOffset(m_meshes.size() + 1, 0); size_t total_f = 0; @@ -2097,8 +2226,8 @@ void DEMDynamicThread::writeMeshesAsVtk(std::ofstream& ptFile) { for (const auto& mmesh : m_meshes) { if (!thisMeshSkip[mesh_num]) { bodyID_t mowner = mmesh->owner; - float3 ownerPos = this->getOwnerPos(mowner)[0]; - float4 ownerOriQ = this->getOwnerOriQ(mowner)[0]; + float3 ownerPos = ownerPosFromHost(mowner); + float4 ownerOriQ = ownerOriQFromHost(mowner); for (const auto& v : mmesh->GetCoordsVertices()) { float3 point = v; applyFrameTransformLocalToGlobal(point, ownerPos, ownerOriQ); @@ -2139,8 +2268,28 @@ void DEMDynamicThread::writeMeshesAsVtk(std::ofstream& ptFile) { } void DEMDynamicThread::writeMeshesAsStl(std::ofstream& ptFile) { - std::ostringstream ostream; migrateFamilyToHost(); + migrateClumpPosInfoToHost(); + writeMeshesAsStlFromHost(ptFile); +} + +void DEMDynamicThread::writeMeshesAsStlFromHost(std::ofstream& ptFile) { + std::ostringstream ostream; + + auto ownerPosFromHost = [this](bodyID_t owner) { + double X, Y, Z; + voxelID_t voxel = voxelID[owner]; + subVoxelPos_t subVoxX = locX[owner]; + subVoxelPos_t subVoxY = locY[owner]; + subVoxelPos_t subVoxZ = locZ[owner]; + voxelIDToPosition(X, Y, Z, voxel, subVoxX, subVoxY, subVoxZ, + simParams->nvXp2, simParams->nvYp2, simParams->voxelSize, + simParams->l); + return make_float3(X + simParams->LBFX, Y + simParams->LBFY, Z + simParams->LBFZ); + }; + auto ownerOriQFromHost = [this](bodyID_t owner) { + return make_float4(oriQx[owner], oriQy[owner], oriQz[owner], oriQw[owner]); + }; std::vector thisMeshSkip(m_meshes.size(), 0); unsigned int mesh_num = 0; @@ -2158,8 +2307,8 @@ void DEMDynamicThread::writeMeshesAsStl(std::ofstream& ptFile) { for (const auto& mmesh : m_meshes) { if (!thisMeshSkip[mesh_num]) { bodyID_t mowner = mmesh->owner; - float3 ownerPos = this->getOwnerPos(mowner)[0]; - float4 ownerOriQ = this->getOwnerOriQ(mowner)[0]; + float3 ownerPos = ownerPosFromHost(mowner); + float4 ownerOriQ = ownerOriQFromHost(mowner); const auto& vertices = mmesh->GetCoordsVertices(); const auto& faces = mmesh->GetIndicesVertexes(); @@ -2189,8 +2338,28 @@ void DEMDynamicThread::writeMeshesAsStl(std::ofstream& ptFile) { } void DEMDynamicThread::writeMeshesAsPly(std::ofstream& ptFile) { - std::ostringstream ostream; migrateFamilyToHost(); + migrateClumpPosInfoToHost(); + writeMeshesAsPlyFromHost(ptFile); +} + +void DEMDynamicThread::writeMeshesAsPlyFromHost(std::ofstream& ptFile) { + std::ostringstream ostream; + + auto ownerPosFromHost = [this](bodyID_t owner) { + double X, Y, Z; + voxelID_t voxel = voxelID[owner]; + subVoxelPos_t subVoxX = locX[owner]; + subVoxelPos_t subVoxY = locY[owner]; + subVoxelPos_t subVoxZ = locZ[owner]; + voxelIDToPosition(X, Y, Z, voxel, subVoxX, subVoxY, subVoxZ, + simParams->nvXp2, simParams->nvYp2, simParams->voxelSize, + simParams->l); + return make_float3(X + simParams->LBFX, Y + simParams->LBFY, Z + simParams->LBFZ); + }; + auto ownerOriQFromHost = [this](bodyID_t owner) { + return make_float4(oriQx[owner], oriQy[owner], oriQz[owner], oriQw[owner]); + }; std::vector vertexOffset(m_meshes.size() + 1, 0); size_t total_f = 0; @@ -2230,8 +2399,8 @@ void DEMDynamicThread::writeMeshesAsPly(std::ofstream& ptFile) { for (const auto& mmesh : m_meshes) { if (!thisMeshSkip[mesh_num]) { bodyID_t mowner = mmesh->owner; - float3 ownerPos = this->getOwnerPos(mowner)[0]; - float4 ownerOriQ = this->getOwnerOriQ(mowner)[0]; + float3 ownerPos = ownerPosFromHost(mowner); + float4 ownerOriQ = ownerOriQFromHost(mowner); for (const auto& v : mmesh->GetCoordsVertices()) { float3 point = v; applyFrameTransformLocalToGlobal(point, ownerPos, ownerOriQ); diff --git a/src/DEM/dT.h b/src/DEM/dT.h index 96e1df94..a6dd86f8 100644 --- a/src/DEM/dT.h +++ b/src/DEM/dT.h @@ -854,10 +854,13 @@ class DEMDynamicThread { // Generate contact info container based on the current contact array, and return it. std::shared_ptr generateContactInfo(float force_thres); + std::shared_ptr generateContactInfoFromHost(float force_thres); #ifdef DEME_USE_CHPF void writeSpheresAsChpf(std::ofstream& ptFile); void writeClumpsAsChpf(std::ofstream& ptFile, unsigned int accuracy = 10); + void writeSpheresAsChpfFromHost(std::ofstream& ptFile); + void writeClumpsAsChpfFromHost(std::ofstream& ptFile, unsigned int accuracy = 10); #endif void writeSpheresAsCsv(std::ofstream& ptFile); void writeClumpsAsCsv(std::ofstream& ptFile, unsigned int accuracy = 10); @@ -865,6 +868,12 @@ class DEMDynamicThread { void writeMeshesAsVtk(std::ofstream& ptFile); void writeMeshesAsStl(std::ofstream& ptFile); void writeMeshesAsPly(std::ofstream& ptFile); + void writeSpheresAsCsvFromHost(std::ofstream& ptFile); + void writeClumpsAsCsvFromHost(std::ofstream& ptFile, unsigned int accuracy = 10); + void writeContactsAsCsvFromHost(std::ofstream& ptFile, float force_thres = DEME_TINY_FLOAT); + void writeMeshesAsVtkFromHost(std::ofstream& ptFile); + void writeMeshesAsStlFromHost(std::ofstream& ptFile); + void writeMeshesAsPlyFromHost(std::ofstream& ptFile); /// Called each time when the user calls DoDynamicsThenSync. void startThread(); diff --git a/src/core/utils/JitHelper.cpp b/src/core/utils/JitHelper.cpp index 874b9232..7d68abaa 100644 --- a/src/core/utils/JitHelper.cpp +++ b/src/core/utils/JitHelper.cpp @@ -78,10 +78,6 @@ JitHelper::CachedProgram JitHelper::buildProgram(const std::string& name, for (auto& subst : ordered_subs) { code = std::regex_replace(code, std::regex(subst.first), subst.second); } - - if (std::find(flags.begin(), flags.end(), "-std=c++17") == flags.end()) { - flags.push_back("-std=c++17"); - } { // Collect CUDA include paths from CMake and common fallbacks std::vector include_paths; diff --git a/src/core/utils/JitHelper.h b/src/core/utils/JitHelper.h index d631312d..610c375c 100644 --- a/src/core/utils/JitHelper.h +++ b/src/core/utils/JitHelper.h @@ -47,13 +47,6 @@ class JitHelper { std::unordered_map substitutions = std::unordered_map(), std::vector flags = std::vector()); - //// I'm pretty sure C++17 auto-converts this - // static CachedProgram buildProgram( - // const std::string& name, const std::string& code, - // std::vector
headers = 0, - // std::vector flags = 0 - // ); - static const std::filesystem::path KERNEL_DIR; static const std::filesystem::path KERNEL_INCLUDE_DIR; static const std::filesystem::path CACHE_DIR; From 881278aa1995981773f92b553d29bc877b48af0d Mon Sep 17 00:00:00 2001 From: Ruochun Date: Tue, 27 Jan 2026 01:30:38 +0800 Subject: [PATCH 08/17] (Supposedly) fix fake-remote-contact-induced big penetration problems - Done by requiring tri--tri primitive contacts to respect the general relative direction of the mesh patches involved to have the right in contact normal voting. So this is still done by finding a good voting strategy which I believe is the key to rule out remote fake contacts --- src/DEM/Defines.h | 4 +- src/DEM/dT.cpp | 22 ++----- src/DEM/dT.h | 4 +- src/algorithms/DEMDynamicMisc.cu | 73 ++++----------------- src/algorithms/DEMStaticDeviceSubroutines.h | 13 ---- src/demo/DEMdemo_DrumCubes.cpp | 2 +- src/kernel/DEMCalcForceKernels_Primitive.cu | 28 ++++++-- src/kernel/DEMKinematicMisc.cu | 6 +- 8 files changed, 45 insertions(+), 107 deletions(-) diff --git a/src/DEM/Defines.h b/src/DEM/Defines.h index 8cc61e26..3e300742 100644 --- a/src/DEM/Defines.h +++ b/src/DEM/Defines.h @@ -360,8 +360,8 @@ struct DEMDataDT { float3* contactTorque_convToForce; float3* contactPointGeometryA; float3* contactPointGeometryB; - // Array to record whether a triangle-triangle primitive contact satisfies SAT (is in physical contact) - notStupidBool_t* contactSATSatisfied; + // Array to record whether a triangle-triangle primitive contact is valid (respects patch--patch general direction) + notStupidBool_t* contactPatchDirectionRespected; // float3* contactHistory; // float* contactDuration; diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index f8c16464..47496ce0 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -67,7 +67,7 @@ void DEMDynamicThread::packDataPointers() { contactTorque_convToForce.bindDevicePointer(&(granData->contactTorque_convToForce)); contactPointGeometryA.bindDevicePointer(&(granData->contactPointGeometryA)); contactPointGeometryB.bindDevicePointer(&(granData->contactPointGeometryB)); - contactSATSatisfied.bindDevicePointer(&(granData->contactSATSatisfied)); + contactPatchDirectionRespected.bindDevicePointer(&(granData->contactPatchDirectionRespected)); // granData->contactHistory = contactHistory.data(); // granData->contactDuration = contactDuration.data(); @@ -576,7 +576,7 @@ void DEMDynamicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(idPrimitiveB, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(contactTypePrimitive, cnt_arr_size, NOT_A_CONTACT); DEME_DUAL_ARRAY_RESIZE(geomToPatchMap, cnt_arr_size, 0); - DEME_DUAL_ARRAY_RESIZE(contactSATSatisfied, cnt_arr_size, 0); + DEME_DUAL_ARRAY_RESIZE(contactPatchDirectionRespected, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(idPatchA, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(idPatchB, cnt_arr_size, 0); @@ -2042,7 +2042,7 @@ inline void DEMDynamicThread::contactPrimitivesArraysResize(size_t nContactPairs DEME_DUAL_ARRAY_RESIZE(contactPointGeometryA, nContactPairs, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(contactPointGeometryB, nContactPairs, make_float3(0)); // NEW: Resize SAT satisfaction array for tracking tri-tri physical contact - DEME_DUAL_ARRAY_RESIZE(contactSATSatisfied, nContactPairs, 0); + DEME_DUAL_ARRAY_RESIZE(contactPatchDirectionRespected, nContactPairs, 0); } // Re-packing pointers now is automatic @@ -2466,17 +2466,6 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( startOffsetPrimitive, startOffsetPatch, countPrimitive, streamInfo.stream); solverScratchSpace.finishUsingTempVector("maxPenetrations"); - // Step 8d: Check if each patch has any SAT-satisfying primitive (for tri-tri contacts) - // If no primitive satisfies SAT, the patch contact is non-physical and should use Step 9 fallback - notStupidBool_t* patchHasSAT = nullptr; - if (contact_type == TRIANGLE_TRIANGLE_CONTACT) { - patchHasSAT = (notStupidBool_t*)solverScratchSpace.allocateTempVector( - "patchHasSAT", countPatch * sizeof(notStupidBool_t)); - checkPatchHasSATSatisfyingPrimitive(&granData, patchHasSAT, keys, startOffsetPrimitive, - startOffsetPatch, countPrimitive, countPatch, - streamInfo.stream); - } - // Clean up keys arrays now that we're done with reductions solverScratchSpace.finishUsingTempVector("votingKeys"); solverScratchSpace.finishUsingTempVector("uniqueKeys"); @@ -2497,8 +2486,8 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( double3* finalContactPoints = (double3*)solverScratchSpace.allocateTempVector("finalContactPoints", countPatch * sizeof(double3)); finalizePatchResults(totalProjectedAreas, votedNormals, maxProjectedPenetrations, votedContactPoints, - zeroAreaNormals, zeroAreaPenetrations, zeroAreaContactPoints, patchHasSAT, - finalAreas, finalNormals, finalPenetrations.data(), finalContactPoints, countPatch, + zeroAreaNormals, zeroAreaPenetrations, zeroAreaContactPoints, finalAreas, + finalNormals, finalPenetrations.data(), finalContactPoints, countPatch, streamInfo.stream); solverScratchSpace.finishUsingTempVector("totalProjectedAreas"); solverScratchSpace.finishUsingTempVector("votedNormals"); @@ -2507,7 +2496,6 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( solverScratchSpace.finishUsingTempVector("zeroAreaPenetrations"); solverScratchSpace.finishUsingTempVector("votedContactPoints"); solverScratchSpace.finishUsingTempVector("zeroAreaContactPoints"); - solverScratchSpace.finishUsingTempVector("patchHasSAT"); // Now we have: // - finalAreas: final contact area per patch pair (countPatch elements) diff --git a/src/DEM/dT.h b/src/DEM/dT.h index c95908b8..77720d99 100644 --- a/src/DEM/dT.h +++ b/src/DEM/dT.h @@ -236,8 +236,8 @@ class DEMDynamicThread { // Local position of contact point of contact w.r.t. the reference frame of body A and B DualArray contactPointGeometryA = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray contactPointGeometryB = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); - // Array to record whether a triangle-triangle primitive contact satisfies SAT (is in physical contact) - DualArray contactSATSatisfied = + // Array to record whether a triangle-triangle primitive contact respects patch--patch general direction + DualArray contactPatchDirectionRespected = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); // Wildcard (extra property) arrays associated with contacts and owners std::vector>> contactWildcards; diff --git a/src/algorithms/DEMDynamicMisc.cu b/src/algorithms/DEMDynamicMisc.cu index 8179fe47..92c9344c 100644 --- a/src/algorithms/DEMDynamicMisc.cu +++ b/src/algorithms/DEMDynamicMisc.cu @@ -138,15 +138,16 @@ __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, // Extract the area (double) from contactPointGeometryB (stored as float3) float3 areaStorage = granData->contactPointGeometryB[myContactID]; double area = float3StorageToDouble(areaStorage); - float3 penStorage = granData->contactPointGeometryA[myContactID]; - double penetration = float3StorageToDouble(penStorage); - penetration = (penetration > DEME_TINY_FLOAT) ? penetration : DEME_TINY_FLOAT; - double recipPen = 1.0 / penetration; + // But primitive contacts that do not respect the patch general direction have no right in deciding the contact + // normal + notStupidBool_t directionRespected = granData->contactPatchDirectionRespected[myContactID]; + if (!directionRespected) { + area = 0.0; + } // Compute weighted normal (normal * area) // Note that fake contacts do not affect as their area is 0 - weightedNormals[idx] = make_float3((double)normal.x * area * recipPen, (double)normal.y * area * recipPen, - (double)normal.z * area * recipPen); + weightedNormals[idx] = make_float3((double)normal.x * area, (double)normal.y * area, (double)normal.z * area); // Store area for reduction areas[idx] = area; @@ -405,55 +406,7 @@ void findMaxPenetrationPrimitiveForZeroAreaPatches(DEMDataDT* granData, } } -// Kernel to check if any primitive in each patch satisfies SAT (for tri-tri contacts) -// Uses simple idempotent writes to set patchHasSAT[patchIdx] = 1 if any primitive has contactSATSatisfied = 1 -// Since we only transition from 0 to 1, and the array is pre-initialized to 0, multiple threads writing 1 is safe -__global__ void checkPatchHasSATSatisfyingPrimitive_impl(DEMDataDT* granData, - notStupidBool_t* patchHasSAT, - contactPairs_t* keys, - contactPairs_t startOffsetPrimitive, - contactPairs_t startOffsetPatch, - contactPairs_t countPrimitive) { - contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < countPrimitive) { - contactPairs_t myContactID = startOffsetPrimitive + idx; - contactPairs_t patchIdx = keys[idx]; - contactPairs_t localPatchIdx = patchIdx - startOffsetPatch; - - // Check if this primitive satisfies SAT - notStupidBool_t satisfiesSAT = granData->contactSATSatisfied[myContactID]; - - // If this primitive satisfies SAT, mark the patch as having at least one SAT-satisfying primitive - // Since we only need to set 0 -> 1, a simple write is safe (multiple threads writing 1 is idempotent) - if (satisfiesSAT) { - patchHasSAT[localPatchIdx] = 1; - } - } -} - -void checkPatchHasSATSatisfyingPrimitive(DEMDataDT* granData, - notStupidBool_t* patchHasSAT, - contactPairs_t* keys, - contactPairs_t startOffsetPrimitive, - contactPairs_t startOffsetPatch, - contactPairs_t countPrimitive, - contactPairs_t countPatch, - cudaStream_t& this_stream) { - // Initialize patchHasSAT to 0 - DEME_GPU_CALL(cudaMemsetAsync(patchHasSAT, 0, countPatch * sizeof(notStupidBool_t), this_stream)); - - size_t blocks_needed = (countPrimitive + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; - if (blocks_needed > 0) { - checkPatchHasSATSatisfyingPrimitive_impl<<>>( - granData, patchHasSAT, keys, startOffsetPrimitive, startOffsetPatch, countPrimitive); - DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); - } -} - // Kernel to finalize patch results by combining normal voting results with zero-area case handling -// For patches with totalArea > 0 AND patchHasSAT = 1: use voted normal and weighted penetration -// For patches with totalArea == 0 OR patchHasSAT = 0: use max-penetration primitive's normal and penetration (Step 8 -// fallback) __global__ void finalizePatchResults_impl(double* totalProjectedAreas, float3* votedNormals, double* votedPenetrations, @@ -461,7 +414,6 @@ __global__ void finalizePatchResults_impl(double* totalProjectedAreas, float3* zeroAreaNormals, double* zeroAreaPenetrations, double3* zeroAreaContactPoints, - notStupidBool_t* patchHasSAT, double* finalAreas, float3* finalNormals, double* finalPenetrations, @@ -470,18 +422,16 @@ __global__ void finalizePatchResults_impl(double* totalProjectedAreas, contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { double projectedArea = totalProjectedAreas[idx]; - // Default to 1 (SAT satisfied) for non-triangle-triangle contacts where patchHasSAT is null - notStupidBool_t hasSAT = (patchHasSAT != nullptr) ? patchHasSAT[idx] : 1; - // Use voted results only if projectedArea > 0 AND at least one primitive satisfies SAT - if (projectedArea > 0.0 && hasSAT) { + // Use voted results only if projectedArea > 0 + if (projectedArea > 0.0) { // Normal case: use voted results finalAreas[idx] = projectedArea; finalNormals[idx] = votedNormals[idx]; finalPenetrations[idx] = votedPenetrations[idx]; finalContactPoints[idx] = votedContactPoints[idx]; } else { - // Zero-area case OR no SAT-satisfying primitives: use max-penetration primitive's results (Step 8 fallback) + // Zero-area case: use max-penetration primitive's results (Step 8 fallback) // Set finalArea to 0 for these cases finalAreas[idx] = 0.0; finalNormals[idx] = zeroAreaNormals[idx]; @@ -498,7 +448,6 @@ void finalizePatchResults(double* totalProjectedAreas, float3* zeroAreaNormals, double* zeroAreaPenetrations, double3* zeroAreaContactPoints, - notStupidBool_t* patchHasSAT, double* finalAreas, float3* finalNormals, double* finalPenetrations, @@ -509,7 +458,7 @@ void finalizePatchResults(double* totalProjectedAreas, if (blocks_needed > 0) { finalizePatchResults_impl<<>>( totalProjectedAreas, votedNormals, votedPenetrations, votedContactPoints, zeroAreaNormals, - zeroAreaPenetrations, zeroAreaContactPoints, patchHasSAT, finalAreas, finalNormals, finalPenetrations, + zeroAreaPenetrations, zeroAreaContactPoints, finalAreas, finalNormals, finalPenetrations, finalContactPoints, count); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); } diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index 747e74c9..edcaf6e5 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -222,17 +222,6 @@ void findMaxPenetrationPrimitiveForZeroAreaPatches(DEMDataDT* granData, contactPairs_t countPrimitive, cudaStream_t& this_stream); -// Checks if any primitive in each patch satisfies SAT (for tri-tri contacts) -// Outputs a flag per patch: 1 if at least one SAT-satisfying primitive exists, 0 otherwise -void checkPatchHasSATSatisfyingPrimitive(DEMDataDT* granData, - notStupidBool_t* patchHasSAT, - contactPairs_t* keys, - contactPairs_t startOffsetPrimitive, - contactPairs_t startOffsetPatch, - contactPairs_t countPrimitive, - contactPairs_t countPatch, - cudaStream_t& this_stream); - // Finalizes patch results by combining normal voting with zero-area case handling void finalizePatchResults(double* totalProjectedAreas, float3* votedNormals, @@ -241,7 +230,6 @@ void finalizePatchResults(double* totalProjectedAreas, float3* zeroAreaNormals, double* zeroAreaPenetrations, double3* zeroAreaContactPoints, - notStupidBool_t* patchHasSAT, double* finalAreas, float3* finalNormals, double* finalPenetrations, @@ -253,7 +241,6 @@ void finalizePatchResults(double* totalProjectedAreas, void finalizePatchContactPoints(double* totalAreas, double3* votedContactPoints, double3* zeroAreaContactPoints, - notStupidBool_t* patchHasSAT, double3* finalContactPoints, contactPairs_t count, cudaStream_t& this_stream); diff --git a/src/demo/DEMdemo_DrumCubes.cpp b/src/demo/DEMdemo_DrumCubes.cpp index 836c4679..55b1842d 100644 --- a/src/demo/DEMdemo_DrumCubes.cpp +++ b/src/demo/DEMdemo_DrumCubes.cpp @@ -113,7 +113,7 @@ int main() { create_directory(out_dir); float time_end = 3.0f; - unsigned int fps = 20; + unsigned int fps = 100; float frame_time = 1.0f / fps; std::cout << "Output at " << fps << " FPS" << std::endl; diff --git a/src/kernel/DEMCalcForceKernels_Primitive.cu b/src/kernel/DEMCalcForceKernels_Primitive.cu index b9c0614e..47d231d7 100644 --- a/src/kernel/DEMCalcForceKernels_Primitive.cu +++ b/src/kernel/DEMCalcForceKernels_Primitive.cu @@ -33,7 +33,10 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi double overlapDepth = 0.0; // Area of the contact surface, or in the mesh--mesh case, area of the clipping polygon projection double overlapArea = 0.0; + // `Body pos' in the primitive contact kernel means the position of the primitive itself, e.g., sphere center or + // triangle nodes double3 AOwnerPos, bodyAPos, BOwnerPos, bodyBPos; + // Radius always means radius of curvature; for triangle and analytical entity, it's set to a huge number float AOwnerMass, ARadius, BOwnerMass, BRadius; float4 AOriQ, BOriQ; deme::materialsOffset_t bodyAMatType, bodyBMatType; @@ -41,6 +44,9 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi float extraMarginSize = 0.; // Triangle A's three points are defined outside, as may be reused in B's acquisition and penetration calc. double3 triANode1, triANode2, triANode3; + // Mesh's patch location may be needed for testing if this primitive contact respects the patch's general spatial + // direction + float3 triPatchPosA; // Then allocate the optional quantities that will be needed in the force model (note: this one can't be in a // curly bracket, obviously...) _forceModelIngredientDefinition_; @@ -97,6 +103,7 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi deme::bodyID_t myPatchID = granData->triPatchID[triID]; bodyAMatType = granData->patchMaterialOffset[myPatchID]; extraMarginSize = granData->familyExtraMarginSize[AOwnerFamily]; + float3 relPosPatch = granData->relPosPatch[myPatchID]; triANode1 = to_double3(granData->relPosNode1[triID]); triANode2 = to_double3(granData->relPosNode2[triID]); @@ -123,6 +130,10 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi triANode3 += AOwnerPos; // Assign the correct bodyAPos bodyAPos = triangleCentroid(triANode1, triANode2, triANode3); + + // Get triPatchPosA ready + applyOriQToVector3(relPosPatch.x, relPosPatch.y, relPosPatch.z, AOriQ.w, AOriQ.x, AOriQ.y, AOriQ.z); + triPatchPosA = relPosPatch + to_float3(AOwnerPos); } else { // Currently, we only support sphere and mesh for body A ContactType = deme::NOT_A_CONTACT; @@ -185,6 +196,7 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi // If this is a triangle then it has a patch ID deme::bodyID_t myPatchID = granData->triPatchID[triID]; bodyBMatType = granData->patchMaterialOffset[myPatchID]; + float3 relPosPatch = granData->relPosPatch[myPatchID]; // As the grace margin, the distance (negative overlap) just needs to be within the grace margin. So we pick // the larger of the 2 familyExtraMarginSize. @@ -217,6 +229,9 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi triBNode3 += BOwnerPos; // Assign the correct bodyBPos bodyBPos = triangleCentroid(triBNode1, triBNode2, triBNode3); + // Get triPatchPosB ready + applyOriQToVector3(relPosPatch.x, relPosPatch.y, relPosPatch.z, BOriQ.w, BOriQ.x, BOriQ.y, BOriQ.z); + float3 triPatchPosB = relPosPatch + to_float3(BOwnerPos); // If B is a triangle, then A can be a sphere or a triangle. if constexpr (AType == deme::GEO_T_SPHERE) { @@ -246,13 +261,12 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi overlapDepth, overlapArea, contactPnt); B2A = to_float3(contact_normal); - // Record whether this tri-tri primitive contact satisfies SAT (is in physical contact) - // Use the dedicated SAT check function to determine if triangles are truly in physical contact - // Note: checkTriangleTriangleOverlap uses projection which can report contact even for non-physical - // "submerged" cases, so we need the actual SAT test for accurate physical contact determination - bool satisfiesSAT = checkTriangleTriangleSAT(triANode1, triANode2, triANode3, triBNode1, - triBNode2, triBNode3); - granData->contactSATSatisfied[myPrimitiveContactID] = satisfiesSAT ? 1 : 0; + // We require that in the tri--tri case, the contact also respects the patch--patch general direction. This + // is because if the contact margin is very large, the algorithm can detect remote fake `submerge' cases + // which involve the triangles of the wrong sides of the mesh particles. But in this case, the direction of + // this contact is almost always opposite to the general direction of the 2 patches (in terms of B2A). + float dotProd = dot(B2A, triPatchPosA - triPatchPosB); + granData->contactPatchDirectionRespected[myPrimitiveContactID] = (dotProd > 0.f) ? 1 : 0; // Fix ContactType if needed // If the solver says in contact, we do not question it diff --git a/src/kernel/DEMKinematicMisc.cu b/src/kernel/DEMKinematicMisc.cu index 82c62723..9793906b 100644 --- a/src/kernel/DEMKinematicMisc.cu +++ b/src/kernel/DEMKinematicMisc.cu @@ -89,9 +89,9 @@ __global__ void computeMarginFromAbsv_implTri(deme::DEMSimParams* simParams, double finalMargin = (double)(vel * simParams->expSafetyMulti + simParams->expSafetyAdder) * (*ts) * (*maxDrift) + granData->familyExtraMarginSize[my_family]; - // if (finalMargin < penetrationMargin) { - // finalMargin = penetrationMargin; - // } + if (finalMargin < penetrationMargin) { + finalMargin = penetrationMargin; + } granData->marginSizeTriangle[triID] = finalMargin; } From 0068b68e1fcf7c83fef39e45305e275cfc09c55b Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Mon, 26 Jan 2026 19:55:03 +0100 Subject: [PATCH 09/17] AutoPatchSplit early mock up + colored PLY output --- src/DEM/API.h | 4 + src/DEM/APIPublic.cpp | 6 +- src/DEM/BdrsAndObjs.h | 122 ++- src/DEM/MeshUtils.cpp | 878 ++++++++++++++++++-- src/DEM/dT.cpp | 39 +- src/DEM/dT.h | 4 +- src/demo/ModularTests/DEMTest_MeshPatch.cpp | 123 ++- 7 files changed, 1104 insertions(+), 72 deletions(-) diff --git a/src/DEM/API.h b/src/DEM/API.h index 631f3e64..0a2aa32c 100644 --- a/src/DEM/API.h +++ b/src/DEM/API.h @@ -1475,6 +1475,8 @@ class DEMSolver { /// @brief Specify the output file format of meshes. /// @param format A choice between "VTK", "OBJ", "STL", "PLY". void SetMeshOutputFormat(const std::string& format); + /// @brief Enable/disable per-patch face colors in PLY mesh output (for testing auto patch splitting only). + void EnableMeshPatchColorOutput(bool enable = true); /// @brief Clear stored solver logs (errors, warnings, messages). void ClearLog() { Logger::GetInstance().Clear(); } /// @brief Show error and warnings. @@ -1570,6 +1572,8 @@ class DEMSolver { CNT_OUTPUT_CONTENT::CNT_WILDCARD; // The output file format for meshes MESH_FORMAT m_mesh_out_format = MESH_FORMAT::VTK; + // If PLY mesh output should include per-patch face colors + bool m_mesh_out_ply_patch_colors = false; // If the solver should output wildcards to file bool m_is_out_owner_wildcards = false; bool m_is_out_cnt_wildcards = false; diff --git a/src/DEM/APIPublic.cpp b/src/DEM/APIPublic.cpp index d93358f3..de4d0b8c 100644 --- a/src/DEM/APIPublic.cpp +++ b/src/DEM/APIPublic.cpp @@ -172,6 +172,10 @@ void DEMSolver::SetMeshOutputFormat(const std::string& format) { } } +void DEMSolver::EnableMeshPatchColorOutput(bool enable) { + m_mesh_out_ply_patch_colors = enable; +} + void DEMSolver::SetOutputContent(const std::vector& content) { std::vector u_content(content.size()); for (unsigned int i = 0; i < content.size(); i++) { @@ -2185,7 +2189,7 @@ void DEMSolver::WriteMeshFile(const std::string& outfilename) const { dT->migrateClumpPosInfoToHost(); m_output_thread = std::thread([this, outfilename]() { std::ofstream ptFile(outfilename, std::ios::out); - dT->writeMeshesAsPlyFromHost(ptFile); + dT->writeMeshesAsPlyFromHost(ptFile, m_mesh_out_ply_patch_colors); }); break; } diff --git a/src/DEM/BdrsAndObjs.h b/src/DEM/BdrsAndObjs.h index 3e8b0b34..7bb747ae 100644 --- a/src/DEM/BdrsAndObjs.h +++ b/src/DEM/BdrsAndObjs.h @@ -606,16 +606,6 @@ class DEMMesh : public DEMInitializer { // Whether patch locations have been explicitly set bool patch_locations_explicitly_set = false; - /// @brief Split the mesh into convex patches based on angle threshold. - /// @details Uses a region-growing algorithm to group adjacent triangles whose face normals differ by less than - /// the specified angle threshold. Each patch represents a locally convex region of the mesh. Patches are - /// non-overlapping and cover the entire mesh. This is useful for contact force calculations. - /// @param angle_threshold_deg Maximum angle (in degrees) between adjacent face normals to be in same patch. - /// Default is 30.0 degrees. Lower values create more patches (stricter convexity), higher values create fewer - /// patches (relaxed convexity). - /// @return Number of patches created. - unsigned int SplitIntoConvexPatches(float angle_threshold_deg = 30.0f); - /// @brief Manually set the patch IDs for each triangle. /// @details Allows user to manually specify which patch each triangle belongs to. This is useful when /// the user has pre-computed patch information or wants to define patches based on custom criteria. @@ -660,6 +650,118 @@ class DEMMesh : public DEMInitializer { /// patch. /// @return Vector of locations (one per patch). std::vector ComputePatchLocations() const; + // ------------------------------------------------------------ + // Advanced mesh patch splitting + quality reporting + // ------------------------------------------------------------ + enum class PatchQualityLevel : uint8_t { SAFE = 0, WARN = 1, CRITICAL = 2 }; + + enum class PatchConstraintStatus : uint8_t { + SATISFIED = 0, + TOO_MANY_UNMERGEABLE = 1, // patch_max konnte wegen hard/concave Barrieren nicht erreicht werden + TOO_FEW_UNSPLITTABLE = 2 // patch_min konnte nicht erreicht werden (zu wenig "splittable" Struktur) + }; + + struct PatchQualityPatch { + PatchQualityLevel level = PatchQualityLevel::SAFE; + + // Normal statistics (area-weighted mean normal, area only for weighting) + float worst_angle_deg = 0.0f; // max deviation from mean normal (largest triangle deviation) + float coherence_r = 1.0f; // ||sum(A*n)|| / sum(A) in [0,1] (1 = perfectly aligned) + + unsigned int n_tris = 0; + + // Internal violations (should be 0 in a "clean" patching) + unsigned int hard_crossings = 0; // internal edges whose triangle normals exceed hard_angle_deg + unsigned int concave_crossings = 0; // internal concave edges (if concavity enabled and oriented edge is reliable) + unsigned int unoriented_edges = 0; // internal edges where orientation test failed (sign dihedral unreliable) + }; + + struct PatchQualityReport { + PatchQualityLevel overall = PatchQualityLevel::SAFE; + PatchConstraintStatus constraint_status = PatchConstraintStatus::SATISFIED; + + unsigned int achieved_patches = 0; + unsigned int requested_min = 1; + unsigned int requested_max = std::numeric_limits::max(); + + std::vector per_patch; + }; + + struct PatchQualityOptions { + // Coherence thresholds + float safe_r = 0.85f; + float warn_r = 0.65f; + + // Worst-angle tolerance: + // - compare worst_angle_deg to the "reference" (patch_normal_max if enabled, else hard_angle) + float warn_worst_angle_margin_deg = 5.0f; + + bool hard_crossings_are_critical = true; + bool concave_crossings_are_critical = false; + + // If unoriented edges are many, concavity sign is unreliable; treat it at least as WARN if concavity is enabled. + unsigned int unoriented_warn_threshold = 10; + }; + + struct PatchSplitOptions { + // Hysteresis: + // - soft < hard => easy merges below soft, cautious merges in (soft..hard) + // - soft < 0 => disable hysteresis (soft = hard) + float soft_angle_deg = -1.0f; + + // Statistical criterion: + // Max allowed angle between candidate triangle normal and current PATCH mean normal. + // < 0 => disabled (legacy-like behavior). + float patch_normal_max_deg = -1.0f; + + // Concavity filter using signed dihedral angle (reliable for consistently oriented manifold surfaces) + bool block_concave_edges = false; + float concave_allow_deg = 0.0f; // 0 => block any concave edge; allow small negative dihedral if desired + + // Patch count constraints (count-only; no area threshold) + unsigned int patch_min = 1; + unsigned int patch_max = std::numeric_limits::max(); + + // Seeding strategy + bool seed_largest_first = true; + + // Optional auto-tuning (OFF by default) + struct AutoTuneOptions { + bool enabled = false; + + // Stop once overall quality is <= target_level (SAFE is strictest) + PatchQualityLevel target_level = PatchQualityLevel::WARN; + + unsigned int max_iters = 6; + + // Step sizes for tightening/loosening (deg) + float step_deg = 5.0f; + + // Allow enabling concavity block automatically if it helps + bool allow_enable_concavity = true; + } auto_tune; + }; + + /// @brief Smart patch splitter with optional hysteresis, patch-normal statistics, dihedral concavity blocking, + /// patch_min/patch_max enforcement, and optional quality report + auto-tuning. + /// @param hard_angle_deg Mandatory: edges above this are NEVER merged. + /// @param opt Advanced controls. + /// @param out_report Optional: returns SAFE/WARN/CRITICAL feedback + constraint status. + /// @param qopt Classification thresholds for feedback. + /// @return Number of patches created (achieved). + unsigned int SplitIntoConvexPatches(float hard_angle_deg, + const PatchSplitOptions& opt, + PatchQualityReport* out_report, + const PatchQualityOptions& qopt); + unsigned int SplitIntoConvexPatches(float hard_angle_deg) { + return SplitIntoConvexPatches(hard_angle_deg, PatchSplitOptions(), nullptr, PatchQualityOptions()); + } + unsigned int SplitIntoConvexPatches(float hard_angle_deg, const PatchSplitOptions& opt) { + return SplitIntoConvexPatches(hard_angle_deg, opt, nullptr, PatchQualityOptions()); + } + unsigned int SplitIntoConvexPatches(float hard_angle_deg, const PatchSplitOptions& opt, PatchQualityReport* out_report) { + return SplitIntoConvexPatches(hard_angle_deg, opt, out_report, PatchQualityOptions()); + } //////////////////////////////////////////////////////// // Some geo wildcard-related stuff diff --git a/src/DEM/MeshUtils.cpp b/src/DEM/MeshUtils.cpp index f6ded434..80d35e96 100644 --- a/src/DEM/MeshUtils.cpp +++ b/src/DEM/MeshUtils.cpp @@ -673,89 +673,865 @@ static std::vector> buildAdjacencyMap(const std::vector DEME_TINY_FLOAT) + return make_float3(v.x / n, v.y / n, v.z / n); + return make_float3(0, 0, 0); +} +static inline float3 add3(const float3& a, const float3& b) { + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +} +static inline float3 mul3(const float3& v, float s) { + return make_float3(v.x * s, v.y * s, v.z * s); +} +static inline float clamp11(float x) { + return std::max(-1.0f, std::min(1.0f, x)); +} +static inline float deg2rad(float deg) { + return deg * (deme::PI / 180.0f); +} +static inline float rad2deg(float rad) { + return rad * (180.0f / deme::PI); +} + +static float computeTriangleArea(const float3& v0, const float3& v1, const float3& v2) { + float3 e1 = make_float3(v1.x - v0.x, v1.y - v0.y, v1.z - v0.z); + float3 e2 = make_float3(v2.x - v0.x, v2.y - v0.y, v2.z - v0.z); + float3 c = cross3(e1, e2); + return 0.5f * norm3(c); +} + +// Signed dihedral angle (deg) around oriented edge va->vb of the current triangle. +// Sign is meaningful only when edge orientation is reliable (oriented_ok == true). +static float signedDihedralDeg(const float3& n_cur, const float3& n_nbr, const float3& vA, const float3& vB) { + float3 e = normalize3(make_float3(vB.x - vA.x, vB.y - vA.y, vB.z - vA.z)); + float s = dot3(e, cross3(n_cur, n_nbr)); + float c = clamp11(dot3(n_cur, n_nbr)); + float theta = std::atan2(s, c); // [-pi, pi] + return rad2deg(theta); +} + +// Build triangle adjacency WITH oriented shared-edge info. +// Non-manifold edges (shared by != 2 faces) are treated as boundaries. +static std::vector> buildAdjacencyWithEdgeInfo(const std::vector& face_v_indices) { + struct EdgeRec { + size_t f; + int a; + int b; + }; + + const size_t num_faces = face_v_indices.size(); + std::vector> adj(num_faces); + + std::map, std::vector> edge_map; + + auto add_edge = [&](size_t f, int a, int b) { + int lo = std::min(a, b); + int hi = std::max(a, b); + edge_map[{lo, hi}].push_back(EdgeRec{f, a, b}); + }; + + for (size_t i = 0; i < num_faces; ++i) { + const int3& tri = face_v_indices[i]; + add_edge(i, tri.x, tri.y); + add_edge(i, tri.y, tri.z); + add_edge(i, tri.z, tri.x); + } + + for (const auto& kv : edge_map) { + const auto& recs = kv.second; + if (recs.size() != 2) { + continue; // boundary or non-manifold + } + const EdgeRec& r0 = recs[0]; + const EdgeRec& r1 = recs[1]; + + bool oriented_ok_0 = (r0.a == r1.b && r0.b == r1.a); + bool oriented_ok_1 = oriented_ok_0; + + adj[r0.f].push_back(EdgeAdjInfo{r1.f, r0.a, r0.b, oriented_ok_0}); + adj[r1.f].push_back(EdgeAdjInfo{r0.f, r1.a, r1.b, oriented_ok_1}); + } + + return adj; +} + +// ------------------------------------------------------------ +// Smart patch splitter +// ------------------------------------------------------------ +unsigned int DEMMesh::SplitIntoConvexPatches(float hard_angle_deg, + const PatchSplitOptions& opt_in, + PatchQualityReport* out_report, + const PatchQualityOptions& qopt) { if (nTri == 0) { patches_explicitly_set = false; nPatches = 1; + if (out_report) { + out_report->overall = PatchQualityLevel::SAFE; + out_report->constraint_status = PatchConstraintStatus::SATISFIED; + out_report->achieved_patches = 1; + out_report->requested_min = 1; + out_report->requested_max = 1; + out_report->per_patch.clear(); + } return 0; } - // Initialize patch IDs (all -1 means unassigned) - m_patch_ids.clear(); - m_patch_ids.resize(nTri, -1); + if (hard_angle_deg <= 0.0f) { + DEME_ERROR("SplitIntoConvexPatches: hard_angle_deg must be > 0."); + } + if (opt_in.patch_min == 0) { + DEME_ERROR("SplitIntoConvexPatches: patch_min must be >= 1."); + } + if (opt_in.patch_min > opt_in.patch_max) { + DEME_ERROR("SplitIntoConvexPatches: patch_min cannot be > patch_max."); + } + + // Copy options (we may adjust defaults in a controlled way) + PatchSplitOptions opt = opt_in; + + hard_angle_deg = std::min(180.0f, std::max(0.0f, hard_angle_deg)); + + // Resolve hysteresis + float soft_angle_deg = (opt.soft_angle_deg >= 0.0f) ? opt.soft_angle_deg : hard_angle_deg; + soft_angle_deg = std::min(hard_angle_deg, std::max(0.0f, soft_angle_deg)); + + // If user activates hysteresis (soft < hard) but didn't enable patch-normal gating, set a sensible default: + // otherwise the mid-band has no extra decision signal. + bool patch_gate_enabled = (opt.patch_normal_max_deg >= 0.0f); + if (!patch_gate_enabled && soft_angle_deg < hard_angle_deg) { + opt.patch_normal_max_deg = soft_angle_deg; + patch_gate_enabled = true; + } + + float patch_normal_max_deg = opt.patch_normal_max_deg; // may be <0 => disabled + if (patch_gate_enabled) { + patch_normal_max_deg = std::min(180.0f, std::max(0.0f, patch_normal_max_deg)); + } + + const float cos_hard = std::cos(deg2rad(hard_angle_deg)); + const float cos_soft = std::cos(deg2rad(soft_angle_deg)); + float cos_patch = -1.0f; + if (patch_gate_enabled) { + cos_patch = std::cos(deg2rad(patch_normal_max_deg)); + } - // Compute face normals for all triangles + // Precompute face normals and areas std::vector face_normals(nTri); + std::vector face_areas(nTri, 0.0f); for (size_t i = 0; i < nTri; ++i) { - const int3& face = m_face_v_indices[i]; - const float3& v0 = m_vertices[face.x]; - const float3& v1 = m_vertices[face.y]; - const float3& v2 = m_vertices[face.z]; + const int3& f = m_face_v_indices[i]; + const float3& v0 = m_vertices[f.x]; + const float3& v1 = m_vertices[f.y]; + const float3& v2 = m_vertices[f.z]; face_normals[i] = computeFaceNormal(v0, v1, v2); + face_areas[i] = computeTriangleArea(v0, v1, v2); + if (face_areas[i] <= DEME_TINY_FLOAT) + face_areas[i] = 0.0f; + } + + // Adjacency with edge info + auto adjacency = buildAdjacencyWithEdgeInfo(m_face_v_indices); + + // Seed order + std::vector seeds(nTri); + for (size_t i = 0; i < nTri; ++i) + seeds[i] = i; + if (opt.seed_largest_first) { + std::stable_sort(seeds.begin(), seeds.end(), [&](size_t a, size_t b) { return face_areas[a] > face_areas[b]; }); } - // Build adjacency map (which triangles share edges) - std::vector> adjacency = buildAdjacencyMap(m_face_v_indices); + // Core segmentation routine (no post-merge/split) + auto segment_once = [&](const PatchSplitOptions& o, + float soft_deg, + bool patch_gate, + float cosPatch, + std::vector& out_ids, + unsigned int& out_nP) { + out_ids.assign(nTri, (patchID_t)-1); + + int current_patch_id = 0; + std::vector queue; + queue.reserve(256); + + for (size_t si = 0; si < nTri; ++si) { + size_t seed = seeds[si]; + if (out_ids[seed] != (patchID_t)-1) + continue; - // Region growing algorithm to assign patches - int current_patch_id = 0; - std::vector queue; + if (current_patch_id > std::numeric_limits::max()) { + DEME_ERROR("SplitIntoPatches: too many patches for patchID_t."); + } - for (size_t seed = 0; seed < nTri; ++seed) { - // Skip if already assigned to a patch - if (m_patch_ids[seed] != -1) { - continue; + float3 sumN = mul3(face_normals[seed], face_areas[seed]); + float sumA = face_areas[seed]; + float3 patchN = normalize3(sumN); + + queue.clear(); + queue.push_back(seed); + out_ids[seed] = (patchID_t)current_patch_id; + + size_t qi = 0; + while (qi < queue.size()) { + size_t cur = queue[qi++]; + + for (const auto& e : adjacency[cur]) { + size_t nb = e.nbr; + if (out_ids[nb] != (patchID_t)-1) + continue; + + const float3& n_cur = face_normals[cur]; + const float3& n_nb = face_normals[nb]; + + // Hard barrier (mandatory) + float d_cn = clamp11(dot3(n_cur, n_nb)); + if (d_cn < cos_hard) + continue; + + // Optional concavity barrier + if (o.block_concave_edges && e.oriented_ok) { + const float3& vA = m_vertices[e.va]; + const float3& vB = m_vertices[e.vb]; + float dih = signedDihedralDeg(n_cur, n_nb, vA, vB); + if (dih < -o.concave_allow_deg) + continue; + } + + // Hysteresis band: + // - if below soft: we still require patch gate if enabled (otherwise accept) + // - if between soft and hard: require patch gate if enabled; otherwise accept (legacy-like) + bool in_soft = (d_cn >= cos_soft); + + if (patch_gate) { + float d_pn = clamp11(dot3(patchN, n_nb)); + if (d_pn < cosPatch) + continue; + // pass patch gate => accept + } else { + // no patch gate => legacy-like behavior (soft only matters if patch gate is active) + (void)in_soft; + } + + out_ids[nb] = (patchID_t)current_patch_id; + queue.push_back(nb); + + if (face_areas[nb] > 0.0f) { + sumN = add3(sumN, mul3(n_nb, face_areas[nb])); + sumA += face_areas[nb]; + patchN = normalize3(sumN); + } + } + } + + current_patch_id++; + } + + out_nP = (unsigned int)current_patch_id; + }; + + // A small helper to compress patch IDs to [0..nP-1] + auto compress_ids = [&](std::vector& ids, unsigned int& out_nP) { + auto res = rank_transform(ids); + ids = std::move(res.first); + // recompute nP + patchID_t mx = 0; + for (auto v : ids) + if (v > mx) mx = v; + out_nP = (unsigned int)(mx + 1); + }; + + // Enforce patch_max by merging adjacent patches where allowed (hard/concave respected) + auto enforce_patch_max = [&](std::vector& ids, unsigned int& pcount, PatchConstraintStatus& cstat) { + if (pcount <= opt.patch_max) + return; + + // Build patch mean normals (area-weighted) + std::vector pSumN(pcount, make_float3(0, 0, 0)); + std::vector pSumA(pcount, 0.0f); + + for (size_t t = 0; t < nTri; ++t) { + int p = (int)ids[t]; + if (face_areas[t] > 0.0f) { + pSumN[p] = add3(pSumN[p], mul3(face_normals[t], face_areas[t])); + pSumA[p] += face_areas[t]; + } } - // Start a new patch from this seed triangle - queue.clear(); - queue.push_back(seed); - m_patch_ids[seed] = current_patch_id; + struct DSU { + std::vector parent, rnk; + std::vector* sumN; + std::vector* sumA; + + DSU(int n, std::vector& sN, std::vector& sA) : parent(n), rnk(n, 0), sumN(&sN), sumA(&sA) { + for (int i = 0; i < n; ++i) parent[i] = i; + } + int find(int x) { + while (parent[x] != x) { + parent[x] = parent[parent[x]]; + x = parent[x]; + } + return x; + } + bool unite(int a, int b) { + a = find(a); b = find(b); + if (a == b) return false; + if (rnk[a] < rnk[b]) std::swap(a, b); + parent[b] = a; + if (rnk[a] == rnk[b]) rnk[a]++; + (*sumN)[a] = add3((*sumN)[a], (*sumN)[b]); + (*sumA)[a] += (*sumA)[b]; + return true; + } + float3 patchN(int x) { + x = find(x); + return normalize3((*sumN)[x]); + } + }; + + DSU dsu((int)pcount, pSumN, pSumA); + + struct Cand { float cost; int a; int b; }; + struct Cmp { bool operator()(const Cand& x, const Cand& y) const { return x.cost > y.cost; } }; + + auto cost_between = [&](int a, int b) { + float3 na = dsu.patchN(a); + float3 nb = dsu.patchN(b); + float d = clamp11(dot3(na, nb)); + return 1.0f - d; // smaller is better (more parallel) + }; + + // Candidate patch adjacency across mergeable edges (hard + optional concavity) + std::map, float> best_cost; + + for (size_t t = 0; t < nTri; ++t) { + int pt = (int)ids[t]; + for (const auto& e : adjacency[t]) { + size_t nb = e.nbr; + int pn = (int)ids[nb]; + if (pt == pn) + continue; + + float d = clamp11(dot3(face_normals[t], face_normals[nb])); + if (d < cos_hard) + continue; + + if (opt.block_concave_edges && e.oriented_ok) { + const float3& vA = m_vertices[e.va]; + const float3& vB = m_vertices[e.vb]; + float dih = signedDihedralDeg(face_normals[t], face_normals[nb], vA, vB); + if (dih < -opt.concave_allow_deg) + continue; + } + + int a = std::min(pt, pn); + int b = std::max(pt, pn); + float c = cost_between(a, b); + + auto key = std::make_pair(a, b); + auto it = best_cost.find(key); + if (it == best_cost.end() || c < it->second) + best_cost[key] = c; + } + } + + std::priority_queue, Cmp> pq; + for (const auto& kv : best_cost) + pq.push(Cand{kv.second, kv.first.first, kv.first.second}); + + unsigned int cur = pcount; + while (cur > opt.patch_max && !pq.empty()) { + auto c = pq.top(); pq.pop(); + int ra = dsu.find(c.a); + int rb = dsu.find(c.b); + if (ra == rb) + continue; + if (dsu.unite(ra, rb)) + cur--; + } + + // If we couldn't merge enough, mark as unmergeable + if (cur > opt.patch_max) + cstat = PatchConstraintStatus::TOO_MANY_UNMERGEABLE; + + // Write back merged ids and compress + std::unordered_map rep2new; + rep2new.reserve(pcount * 2); + + patchID_t next = 0; + for (size_t i = 0; i < nTri; ++i) { + int p = (int)ids[i]; + int r = dsu.find(p); + auto it = rep2new.find(r); + if (it == rep2new.end()) { + rep2new.emplace(r, next); + ids[i] = next; + next++; + } else { + ids[i] = it->second; + } + } + pcount = (unsigned int)next; + }; + + // Enforce patch_min by splitting worst-spread patches (count-only) + auto enforce_patch_min = [&](std::vector& ids, unsigned int& pcount, PatchConstraintStatus& cstat) { + if (pcount >= opt.patch_min) + return; - // Grow the region - size_t queue_idx = 0; - while (queue_idx < queue.size()) { - size_t current = queue[queue_idx++]; + auto rebuild_patch_lists = [&](std::vector>& pTris) { + pTris.assign(pcount, {}); + for (size_t i = 0; i < nTri; ++i) { + int p = (int)ids[i]; + pTris[p].push_back(i); + } + }; + + std::vector> pTris; + rebuild_patch_lists(pTris); + + auto patch_mean_normal = [&](int p) { + float3 sumN = make_float3(0, 0, 0); + float sumA = 0.0f; + for (size_t t : pTris[p]) { + if (face_areas[t] > 0.0f) { + sumN = add3(sumN, mul3(face_normals[t], face_areas[t])); + sumA += face_areas[t]; + } + } + (void)sumA; + return normalize3(sumN); + }; + + auto pick_patch_to_split = [&]() -> int { + float worst = 1.0f; + int worst_p = -1; + for (int p = 0; p < (int)pcount; ++p) { + if (pTris[p].size() < 2) + continue; + float3 pn = patch_mean_normal(p); + float minDot = 1.0f; + for (size_t t : pTris[p]) { + float d = clamp11(dot3(pn, face_normals[t])); + minDot = std::min(minDot, d); + } + if (minDot < worst) { + worst = minDot; + worst_p = p; + } + } + return worst_p; + }; + + struct Node { float cost; size_t tri; int label; }; + struct NodeCmp { bool operator()(const Node& a, const Node& b) const { return a.cost > b.cost; } }; + + std::vector label(nTri, -2); + std::vector touched; touched.reserve(2048); + + while (pcount < opt.patch_min) { + int p = pick_patch_to_split(); + if (p < 0) { + cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; + break; + } + const auto& tris = pTris[p]; + if (tris.size() < 2) { + cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; + break; + } + + // choose 2 seeds with farthest normals (2-sweep) + size_t t0 = tris[0]; + size_t sA = t0; + float best = 1.0f; + for (size_t t : tris) { + float d = clamp11(dot3(face_normals[t0], face_normals[t])); + if (d < best) { best = d; sA = t; } + } + size_t sB = sA; + best = 1.0f; + for (size_t t : tris) { + float d = clamp11(dot3(face_normals[sA], face_normals[t])); + if (d < best) { best = d; sB = t; } + } + if (sA == sB) { + cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; + break; + } + + touched.clear(); + for (size_t t : tris) { + label[t] = -1; + touched.push_back(t); + } + + std::priority_queue, NodeCmp> pq; + label[sA] = 0; label[sB] = 1; + pq.push(Node{0.0f, sA, 0}); + pq.push(Node{0.0f, sB, 1}); + + const float3 seedN[2] = {face_normals[sA], face_normals[sB]}; - // Check all adjacent triangles - for (size_t neighbor : adjacency[current]) { - // Skip if already assigned - if (m_patch_ids[neighbor] != -1) { + while (!pq.empty()) { + Node cur = pq.top(); pq.pop(); + size_t t = cur.tri; + int lbl = cur.label; + if (label[t] != lbl) continue; + + for (const auto& e : adjacency[t]) { + size_t nb = e.nbr; + if (label[nb] != -1) + continue; + + float d = clamp11(dot3(face_normals[t], face_normals[nb])); + if (d < cos_hard) + continue; + + float dn = clamp11(dot3(face_normals[nb], seedN[lbl])); + float cost = 1.0f - dn; + + label[nb] = (int8_t)lbl; + pq.push(Node{cost, nb, lbl}); } + } + + size_t c0 = 0, c1 = 0; + for (size_t t : tris) { + if (label[t] == 0) c0++; + else if (label[t] == 1) c1++; + } + if (c0 == 0 || c1 == 0) { + for (size_t t : touched) label[t] = -2; + cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; + break; + } - // Check angle between normals - float angle = computeAngleBetweenNormals(face_normals[current], face_normals[neighbor]); + patchID_t newP = (patchID_t)pcount; + pcount++; + + for (size_t t : tris) { + ids[t] = (label[t] == 1) ? newP : (patchID_t)p; + } - // If angle is below threshold, add to same patch - if (angle <= angle_threshold_deg) { - m_patch_ids[neighbor] = current_patch_id; - queue.push_back(neighbor); + for (size_t t : touched) label[t] = -2; + + // compress & rebuild + compress_ids(ids, pcount); + rebuild_patch_lists(pTris); + } + }; + + // Quality report computation + auto compute_report = [&](const std::vector& ids, + unsigned int pcount, + PatchConstraintStatus cstat, + PatchQualityReport& rep) { + rep.per_patch.assign(pcount, PatchQualityPatch{}); + rep.overall = PatchQualityLevel::SAFE; + rep.constraint_status = cstat; + rep.achieved_patches = pcount; + rep.requested_min = opt.patch_min; + rep.requested_max = opt.patch_max; + + std::vector> pTris(pcount); + for (size_t i = 0; i < nTri; ++i) { + int p = (int)ids[i]; + pTris[p].push_back(i); + } + + std::vector pSumN(pcount, make_float3(0, 0, 0)); + std::vector pSumA(pcount, 0.0f); + + for (int p = 0; p < (int)pcount; ++p) { + for (size_t t : pTris[p]) { + if (face_areas[t] > 0.0f) { + pSumN[p] = add3(pSumN[p], mul3(face_normals[t], face_areas[t])); + pSumA[p] += face_areas[t]; } } } - // Move to next patch - current_patch_id++; + // reference angle for classification + float ref_angle_deg = patch_gate_enabled ? patch_normal_max_deg : hard_angle_deg; + + for (int p = 0; p < (int)pcount; ++p) { + PatchQualityPatch pq; + pq.n_tris = (unsigned int)pTris[p].size(); + + float3 meanN = normalize3(pSumN[p]); + float sumA = pSumA[p]; + float r = (sumA > DEME_TINY_FLOAT) ? (norm3(pSumN[p]) / sumA) : 0.0f; + pq.coherence_r = std::min(1.0f, std::max(0.0f, r)); + + float minDot = 1.0f; + for (size_t t : pTris[p]) { + float d = clamp11(dot3(meanN, face_normals[t])); + minDot = std::min(minDot, d); + } + pq.worst_angle_deg = rad2deg(std::acos(clamp11(minDot))); + + unsigned int hard_cross = 0; + unsigned int conc_cross = 0; + unsigned int unoriented = 0; + + for (size_t t : pTris[p]) { + for (const auto& e : adjacency[t]) { + size_t nb = e.nbr; + if ((int)ids[nb] != p) + continue; + + float d = clamp11(dot3(face_normals[t], face_normals[nb])); + if (d < cos_hard) + hard_cross++; + + if (opt.block_concave_edges) { + if (!e.oriented_ok) { + unoriented++; + } else { + const float3& vA = m_vertices[e.va]; + const float3& vB = m_vertices[e.vb]; + float dih = signedDihedralDeg(face_normals[t], face_normals[nb], vA, vB); + if (dih < -opt.concave_allow_deg) + conc_cross++; + } + } + } + } + + pq.hard_crossings = hard_cross / 2; + pq.concave_crossings = conc_cross / 2; + pq.unoriented_edges = unoriented / 2; + + PatchQualityLevel lvl = PatchQualityLevel::SAFE; + + if (qopt.hard_crossings_are_critical && pq.hard_crossings > 0) { + lvl = PatchQualityLevel::CRITICAL; + } + + if (lvl != PatchQualityLevel::CRITICAL) { + bool angle_ok = (pq.worst_angle_deg <= ref_angle_deg); + bool angle_warn = (pq.worst_angle_deg <= ref_angle_deg + qopt.warn_worst_angle_margin_deg); + + if (pq.coherence_r < qopt.warn_r || !angle_warn) { + lvl = PatchQualityLevel::CRITICAL; + } else if (pq.coherence_r < qopt.safe_r || !angle_ok) { + lvl = PatchQualityLevel::WARN; + } + } + + if (opt.block_concave_edges && pq.concave_crossings > 0) { + if (qopt.concave_crossings_are_critical) + lvl = PatchQualityLevel::CRITICAL; + else if (lvl == PatchQualityLevel::SAFE) + lvl = PatchQualityLevel::WARN; + } + + if (opt.block_concave_edges && pq.unoriented_edges >= qopt.unoriented_warn_threshold && lvl == PatchQualityLevel::SAFE) { + lvl = PatchQualityLevel::WARN; + } + + pq.level = lvl; + rep.per_patch[p] = pq; + + if ((int)lvl > (int)rep.overall) + rep.overall = lvl; + } + }; + + // ------------------------------------------------------------ + // Optional auto tuning (OFF unless opt.auto_tune.enabled == true) + // ------------------------------------------------------------ + auto run_full = [&](PatchSplitOptions run_opt, + std::vector& ids_out, + unsigned int& pcount_out, + PatchConstraintStatus& cstat_out, + PatchQualityReport* rep_out) { + cstat_out = PatchConstraintStatus::SATISFIED; + + float run_soft = (run_opt.soft_angle_deg >= 0.0f) ? run_opt.soft_angle_deg : hard_angle_deg; + run_soft = std::min(hard_angle_deg, std::max(0.0f, run_soft)); + + bool run_patch_gate = (run_opt.patch_normal_max_deg >= 0.0f); + if (!run_patch_gate && run_soft < hard_angle_deg) { + run_opt.patch_normal_max_deg = run_soft; + run_patch_gate = true; + } + + float run_cos_patch = -1.0f; + if (run_patch_gate) { + float run_patch_deg = std::min(180.0f, std::max(0.0f, run_opt.patch_normal_max_deg)); + run_cos_patch = std::cos(deg2rad(run_patch_deg)); + } + + // segment + segment_once(run_opt, run_soft, run_patch_gate, run_cos_patch, ids_out, pcount_out); + compress_ids(ids_out, pcount_out); + + // enforce max, then min (count-only) + enforce_patch_max(ids_out, pcount_out, cstat_out); + enforce_patch_min(ids_out, pcount_out, cstat_out); + + // final compress + compress_ids(ids_out, pcount_out); + + if (rep_out) { + PatchQualityReport tmp; + // Update globals for report reference (patch_gate_enabled etc.) are based on outer opt; + // for report classification, we reuse "current" (outer) patch_gate_enabled and patch_normal_max_deg. + // For best accuracy you can compute ref_angle from run_opt as well; keep simple here. + compute_report(ids_out, pcount_out, cstat_out, tmp); + *rep_out = std::move(tmp); + } + }; + + std::vector best_ids; + unsigned int best_pcount = 0; + PatchConstraintStatus best_cstat = PatchConstraintStatus::SATISFIED; + PatchQualityReport best_rep; + + if (!opt.auto_tune.enabled) { + run_full(opt, best_ids, best_pcount, best_cstat, out_report ? &best_rep : nullptr); + } else { + // Auto-tuning is conservative: it will not run if you hard-fix the count (patch_min == patch_max), + // because then your intention is explicit ("keep the cube a cube"). + if (opt.patch_min == opt.patch_max) { + run_full(opt, best_ids, best_pcount, best_cstat, out_report ? &best_rep : nullptr); + } else { + // Start from user options; search by tightening/loosening patch_normal_max_deg (and soft if present) + PatchSplitOptions cur = opt; + + auto severity_score = [&](PatchQualityLevel lvl) { return (int)lvl; }; + + bool have_best = false; + + for (unsigned int it = 0; it < opt.auto_tune.max_iters; ++it) { + std::vector ids; + unsigned int pc = 0; + PatchConstraintStatus cs = PatchConstraintStatus::SATISFIED; + PatchQualityReport rep; + + run_full(cur, ids, pc, cs, &rep); + + // candidate score: prioritize meeting constraints, then quality, then fewer patches + bool constraints_ok = (cs == PatchConstraintStatus::SATISFIED); + int sev = severity_score(rep.overall); + + auto better_than = [&](bool ok, int s, unsigned int p) { + if (!have_best) return true; + bool best_ok = (best_cstat == PatchConstraintStatus::SATISFIED); + int best_sev = severity_score(best_rep.overall); + if (ok != best_ok) return ok; // prefer satisfied + if (s != best_sev) return s < best_sev; // prefer safer + return p < best_pcount; // prefer fewer patches + }; + + if (better_than(constraints_ok, sev, pc)) { + best_ids = std::move(ids); + best_pcount = pc; + best_cstat = cs; + best_rep = std::move(rep); + have_best = true; + } + + // stop if good enough + if (constraints_ok && (int)best_rep.overall <= (int)opt.auto_tune.target_level) + break; + + // Adjust rules: + // - If CRITICAL and we can afford more patches => tighten (smaller patch_normal_max, smaller soft) + // - If too many unmergeable patches => loosen (bigger patch_normal_max, bigger soft, disable concavity if needed) + // - If too few patches => tighten + if (cs == PatchConstraintStatus::TOO_MANY_UNMERGEABLE) { + // loosen + if (cur.patch_normal_max_deg >= 0.0f) + cur.patch_normal_max_deg = std::min(180.0f, cur.patch_normal_max_deg + opt.auto_tune.step_deg); + if (cur.soft_angle_deg >= 0.0f) + cur.soft_angle_deg = std::min(hard_angle_deg, cur.soft_angle_deg + opt.auto_tune.step_deg); + if (cur.block_concave_edges && opt.auto_tune.allow_enable_concavity) { + // concavity block can prevent merging; relax it + cur.block_concave_edges = false; + } + } else if (pc < opt.patch_min || rep.overall == PatchQualityLevel::CRITICAL) { + // tighten if possible + if (cur.patch_normal_max_deg < 0.0f) + cur.patch_normal_max_deg = std::min(hard_angle_deg, 45.0f); // enable with a sane default + else + cur.patch_normal_max_deg = std::max(0.0f, cur.patch_normal_max_deg - opt.auto_tune.step_deg); + + if (cur.soft_angle_deg >= 0.0f) + cur.soft_angle_deg = std::max(0.0f, cur.soft_angle_deg - opt.auto_tune.step_deg); + + if (!cur.block_concave_edges && opt.auto_tune.allow_enable_concavity) { + cur.block_concave_edges = true; + cur.concave_allow_deg = std::max(0.0f, cur.concave_allow_deg); + } + } else if (pc > opt.patch_max) { + // loosen (but note: enforce_patch_max already tries) + if (cur.patch_normal_max_deg >= 0.0f) + cur.patch_normal_max_deg = std::min(180.0f, cur.patch_normal_max_deg + opt.auto_tune.step_deg); + if (cur.soft_angle_deg >= 0.0f) + cur.soft_angle_deg = std::min(hard_angle_deg, cur.soft_angle_deg + opt.auto_tune.step_deg); + } else { + // stable but not good enough; slightly tighten coherence if we have headroom under patch_max + if (pc < opt.patch_max) { + if (cur.patch_normal_max_deg < 0.0f) + cur.patch_normal_max_deg = std::min(hard_angle_deg, 45.0f); + else + cur.patch_normal_max_deg = std::max(0.0f, cur.patch_normal_max_deg - opt.auto_tune.step_deg); + } else { + break; + } + } + } + + // If never found, fall back + if (!have_best) { + run_full(opt, best_ids, best_pcount, best_cstat, out_report ? &best_rep : nullptr); + } + } } - nPatches = current_patch_id; + // Commit to mesh state + m_patch_ids = std::move(best_ids); + nPatches = best_pcount; patches_explicitly_set = true; - // If material is set and we cannot broadcast it to all patches, we raise error + // Feedback output + if (out_report) { + *out_report = std::move(best_rep); + } + + // Material broadcasting (same as existing behavior) + if (isMaterialSet && materials.size() == 1) { + materials = std::vector>(nPatches, materials[0]); + } if (isMaterialSet && materials.size() != nPatches) { DEME_ERROR( "The number of materials set (%zu) does not match the number of patches (%u). Please set the " "material for each patch or use a single material for all patches.", materials.size(), nPatches); } - // If material is set and we can broadcast it to all patches, we do so - if (isMaterialSet && materials.size() == 1) { - materials = std::vector>(nPatches, materials[0]); - } return nPatches; } @@ -786,6 +1562,10 @@ void DEMMesh::SetPatchIDs(const std::vector& patch_ids) { patches_explicitly_set = true; + // If material is set and we can broadcast it to all patches, we do so + if (isMaterialSet && materials.size() == 1) { + materials = std::vector>(nPatches, materials[0]); + } // If material is set and we cannot broadcast it to all patches, we raise error if (isMaterialSet && materials.size() != nPatches) { DEME_ERROR( @@ -793,10 +1573,6 @@ void DEMMesh::SetPatchIDs(const std::vector& patch_ids) { "material for each patch or use a single material for all patches.", materials.size(), nPatches); } - // If material is set and we can broadcast it to all patches, we do so - if (isMaterialSet && materials.size() == 1) { - materials = std::vector>(nPatches, materials[0]); - } } // Compute patch locations (relative to CoM, which is implicitly at 0,0,0) diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index a72d3197..6232694c 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -2337,13 +2337,13 @@ void DEMDynamicThread::writeMeshesAsStlFromHost(std::ofstream& ptFile) { ptFile << ostream.str(); } -void DEMDynamicThread::writeMeshesAsPly(std::ofstream& ptFile) { +void DEMDynamicThread::writeMeshesAsPly(std::ofstream& ptFile, bool patch_colors) { migrateFamilyToHost(); migrateClumpPosInfoToHost(); - writeMeshesAsPlyFromHost(ptFile); + writeMeshesAsPlyFromHost(ptFile, patch_colors); } -void DEMDynamicThread::writeMeshesAsPlyFromHost(std::ofstream& ptFile) { +void DEMDynamicThread::writeMeshesAsPlyFromHost(std::ofstream& ptFile, bool patch_colors) { std::ostringstream ostream; auto ownerPosFromHost = [this](bodyID_t owner) { @@ -2393,6 +2393,11 @@ void DEMDynamicThread::writeMeshesAsPlyFromHost(std::ofstream& ptFile) { ostream << "property float z" << std::endl; ostream << "element face " << total_f << std::endl; ostream << "property list uchar int vertex_indices" << std::endl; + if (patch_colors) { + ostream << "property uchar red" << std::endl; + ostream << "property uchar green" << std::endl; + ostream << "property uchar blue" << std::endl; + } ostream << "end_header" << std::endl; mesh_num = 0; @@ -2411,13 +2416,37 @@ void DEMDynamicThread::writeMeshesAsPlyFromHost(std::ofstream& ptFile) { } ostream << std::endl; + auto hash32 = [](uint32_t x) { + x ^= x >> 16; + x *= 0x7feb352d; + x ^= x >> 15; + x *= 0x846ca68b; + x ^= x >> 16; + return x; + }; + mesh_num = 0; for (const auto& mmesh : m_meshes) { if (!thisMeshSkip[mesh_num]) { - for (const auto& f : mmesh->GetIndicesVertexes()) { + const auto& faces = mmesh->GetIndicesVertexes(); + const auto& patch_ids = mmesh->GetPatchIDs(); + bool has_patch_ids = (patch_ids.size() == faces.size()); + + for (size_t fi = 0; fi < faces.size(); ++fi) { + const auto& f = faces[fi]; ostream << "3 " << (size_t)f.x + vertexOffset[mesh_num] << " " << (size_t)f.y + vertexOffset[mesh_num] << " " - << (size_t)f.z + vertexOffset[mesh_num] << std::endl; + << (size_t)f.z + vertexOffset[mesh_num]; + if (patch_colors) { + uint32_t patch_id = has_patch_ids ? static_cast(patch_ids[fi]) : 0u; + uint32_t key = patch_id + 0x9e3779b9u * (mesh_num + 1u); + uint32_t h = hash32(key); + unsigned int r = (h >> 16) & 0xFFu; + unsigned int g = (h >> 8) & 0xFFu; + unsigned int b = h & 0xFFu; + ostream << " " << r << " " << g << " " << b; + } + ostream << std::endl; } } mesh_num++; diff --git a/src/DEM/dT.h b/src/DEM/dT.h index a6dd86f8..cbb4f0c7 100644 --- a/src/DEM/dT.h +++ b/src/DEM/dT.h @@ -867,13 +867,13 @@ class DEMDynamicThread { void writeContactsAsCsv(std::ofstream& ptFile, float force_thres = DEME_TINY_FLOAT); void writeMeshesAsVtk(std::ofstream& ptFile); void writeMeshesAsStl(std::ofstream& ptFile); - void writeMeshesAsPly(std::ofstream& ptFile); + void writeMeshesAsPly(std::ofstream& ptFile, bool patch_colors = false); void writeSpheresAsCsvFromHost(std::ofstream& ptFile); void writeClumpsAsCsvFromHost(std::ofstream& ptFile, unsigned int accuracy = 10); void writeContactsAsCsvFromHost(std::ofstream& ptFile, float force_thres = DEME_TINY_FLOAT); void writeMeshesAsVtkFromHost(std::ofstream& ptFile); void writeMeshesAsStlFromHost(std::ofstream& ptFile); - void writeMeshesAsPlyFromHost(std::ofstream& ptFile); + void writeMeshesAsPlyFromHost(std::ofstream& ptFile, bool patch_colors = false); /// Called each time when the user calls DoDynamicsThenSync. void startThread(); diff --git a/src/demo/ModularTests/DEMTest_MeshPatch.cpp b/src/demo/ModularTests/DEMTest_MeshPatch.cpp index cd98a258..06c9b5e2 100644 --- a/src/demo/ModularTests/DEMTest_MeshPatch.cpp +++ b/src/demo/ModularTests/DEMTest_MeshPatch.cpp @@ -18,6 +18,7 @@ #include #include #include +#include using namespace deme; using namespace std::filesystem; @@ -75,6 +76,23 @@ int main() { } } + // Optimized patch settings for convex-focused splitting (prefer single patch) + std::cout << "\n--- Test 2b: Optimized Convex Patch Splitting (Cube) ---" << std::endl; + DEMMesh::PatchSplitOptions opt; + opt.soft_angle_deg = -1.0f; + opt.patch_normal_max_deg = -1.0f; + opt.block_concave_edges = true; + opt.concave_allow_deg = 0.0f; + opt.patch_min = 1; + opt.patch_max = std::numeric_limits::max(); + opt.seed_largest_first = true; + opt.auto_tune.enabled = false; + + DEMMesh::PatchQualityReport rep_cube; + size_t num_patches_opt = cube_mesh->SplitIntoConvexPatches(120.0f, opt, &rep_cube); + std::cout << "Optimized patches: " << num_patches_opt << " (quality " + << static_cast(rep_cube.overall) << ")" << std::endl; + // Test manual patch ID setting std::cout << "\n--- Test 3: Manual Patch ID Setting ---" << std::endl; size_t num_tris = cube_mesh->GetNumTriangles(); @@ -113,9 +131,21 @@ int main() { std::cout << "Number of triangles: " << sphere_mesh->GetNumTriangles() << std::endl; std::cout << "Number of vertices: " << sphere_mesh->GetNumNodes() << std::endl; - // Test with 30 degree threshold - size_t num_patches = sphere_mesh->SplitIntoConvexPatches(30.0f); - std::cout << "Split into " << num_patches << " patches (threshold: 30 degrees)" << std::endl; + // Optimized patch split (prefer single patch) + DEMMesh::PatchSplitOptions opt; + opt.soft_angle_deg = -1.0f; + opt.patch_normal_max_deg = -1.0f; + opt.block_concave_edges = true; + opt.concave_allow_deg = 0.0f; + opt.patch_min = 1; + opt.patch_max = std::numeric_limits::max(); + opt.seed_largest_first = true; + opt.auto_tune.enabled = false; + + DEMMesh::PatchQualityReport rep_sphere; + size_t num_patches = sphere_mesh->SplitIntoConvexPatches(120.0f, opt, &rep_sphere); + std::cout << "Split into " << num_patches << " patches (optimized, quality " + << static_cast(rep_sphere.overall) << ")" << std::endl; if (sphere_mesh->ArePatchesExplicitlySet()) { const auto& patch_ids = sphere_mesh->GetPatchIDs(); @@ -146,6 +176,93 @@ int main() { std::cout << "Patches explicitly set: " << (empty_mesh->ArePatchesExplicitlySet() ? "yes" : "no") << " (expected: no)" << std::endl; + // Test concave mesh (drum) + std::cout << "\n--- Test 6: Concave Drum Mesh (STL) ---" << std::endl; + auto drum_mesh = std::make_shared(); + loaded = drum_mesh->LoadSTLMesh((GET_DATA_PATH() / "mesh/drum.stl").string()); + if (loaded) { + std::cout << "Loaded drum mesh successfully" << std::endl; + std::cout << "Number of triangles: " << drum_mesh->GetNumTriangles() << std::endl; + std::cout << "Number of vertices: " << drum_mesh->GetNumNodes() << std::endl; + + DEMMesh::PatchSplitOptions opt; + opt.soft_angle_deg = -1.0f; + opt.patch_normal_max_deg = -1.0f; + opt.block_concave_edges = true; + opt.concave_allow_deg = 0.0f; + opt.patch_min = 1; + opt.patch_max = std::numeric_limits::max(); + opt.seed_largest_first = true; + opt.auto_tune.enabled = false; + + DEMMesh::PatchQualityReport rep_drum; + size_t num_patches = drum_mesh->SplitIntoConvexPatches(120.0f, opt, &rep_drum); + std::cout << "Split into " << num_patches << " patches (concave, quality " + << static_cast(rep_drum.overall) << ")" << std::endl; + } else { + std::cout << "Drum mesh not available, skipping" << std::endl; + } + + // Test PLY export with per-patch colors (debug view) + std::cout << "\n--- Test 7: PLY Export with Patch Colors (per mesh) ---" << std::endl; + { + path out_dir = current_path(); + out_dir /= "DemoOutput_MeshPatch"; + create_directory(out_dir); + + auto export_mesh = [&](const std::string& label, const path& mesh_path, bool is_stl) { + DEMSolver DEMSim; + DEMSim.SetVerbosity("INFO"); + DEMSim.SetMeshOutputFormat("PLY"); + DEMSim.EnableMeshPatchColorOutput(true); + DEMSim.InstructBoxDomainDimension(10, 10, 10); + DEMSim.SetMeshUniversalContact(true); + + auto mat_type = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}}); + + std::shared_ptr mesh_template; + if (is_stl) { + mesh_template = DEMSim.LoadMeshType(mesh_path.string(), mat_type, true, false); + } else { + mesh_template = DEMSim.LoadMeshType(mesh_path.string(), mat_type, true, false); + } + + if (!mesh_template) { + std::cout << "Failed to load mesh template for " << label << std::endl; + return; + } + + DEMMesh::PatchSplitOptions opt; + opt.soft_angle_deg = -1.0f; + opt.patch_normal_max_deg = -1.0f; + opt.block_concave_edges = true; + opt.concave_allow_deg = 0.0f; + opt.patch_min = 1; + opt.patch_max = std::numeric_limits::max(); + opt.seed_largest_first = true; + opt.auto_tune.enabled = false; + + mesh_template->SplitIntoConvexPatches(120.0f, opt); + mesh_template->SetMaterial(mat_type); + + auto mesh_instance = DEMSim.AddMeshFromTemplate(mesh_template, make_float3(0, 0, 0)); + mesh_instance->SetFamily(0); + mesh_instance->SetMass(1000.); + mesh_instance->SetMOI(make_float3(200., 200., 200.)); + + DEMSim.Initialize(); + + path ply_file = out_dir / ("mesh_patch_colors_" + label + ".ply"); + DEMSim.WriteMeshFile(ply_file); + DEMSim.WaitForPendingOutput(); + std::cout << "Wrote patch-colored PLY to: " << ply_file << std::endl; + }; + + export_mesh("cube", GET_DATA_PATH() / "mesh/cube.obj", false); + export_mesh("sphere", GET_DATA_PATH() / "mesh/sphere.obj", false); + export_mesh("drum", GET_DATA_PATH() / "mesh/drum.stl", true); + } + std::cout << "\n========================================" << std::endl; std::cout << "Demo completed successfully!" << std::endl; std::cout << "========================================" << std::endl; From f9b7dab83f8be73040b29cbe7e843def4681cb6e Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Mon, 26 Jan 2026 22:18:43 +0100 Subject: [PATCH 10/17] Simple Collison Test V00 added --- src/demo/CMakeLists.txt | 10 +- src/demo/ModularTests/CMakeLists.txt | 11 +- .../ModularTests/DEMTest_SimpleCollisions.cpp | 294 ++++++++++++++++++ 3 files changed, 307 insertions(+), 8 deletions(-) create mode 100644 src/demo/ModularTests/DEMTest_SimpleCollisions.cpp diff --git a/src/demo/CMakeLists.txt b/src/demo/CMakeLists.txt index 82771448..70f46bbc 100644 --- a/src/demo/CMakeLists.txt +++ b/src/demo/CMakeLists.txt @@ -89,10 +89,12 @@ FOREACH(PROGRAM ${DEMOS}) add_dependencies(${PROGRAM} ${LIBRARIES}) - set_target_properties( - ${PROGRAM} PROPERTIES - CXX_STANDARD ${CXXSTD_SUPPORTED} - ) + if (CXXSTD_SUPPORTED) + set_target_properties( + ${PROGRAM} PROPERTIES + CXX_STANDARD ${CXXSTD_SUPPORTED} + ) + endif() # install(TARGETS ${PROGRAM} DESTINATION ${DEME_INSTALL_DEMO}) diff --git a/src/demo/ModularTests/CMakeLists.txt b/src/demo/ModularTests/CMakeLists.txt index d6083976..8d10e1fd 100644 --- a/src/demo/ModularTests/CMakeLists.txt +++ b/src/demo/ModularTests/CMakeLists.txt @@ -14,6 +14,7 @@ SET(MODULAR_TESTS DEMTest_MeshTemplate DEMTest_PatchLocations DEMTest_MeshPatch + DEMTest_SimpleCollisions ) # ------------------------------------------------------------------------------ @@ -50,9 +51,11 @@ FOREACH(PROGRAM ${MODULAR_TESTS}) add_dependencies(${PROGRAM} ${LIBRARIES}) - set_target_properties( - ${PROGRAM} PROPERTIES - CXX_STANDARD ${CXXSTD_SUPPORTED} - ) + if (CXXSTD_SUPPORTED) + set_target_properties( + ${PROGRAM} PROPERTIES + CXX_STANDARD ${CXXSTD_SUPPORTED} + ) + endif() ENDFOREACH(PROGRAM) diff --git a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp new file mode 100644 index 00000000..34d98d64 --- /dev/null +++ b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp @@ -0,0 +1,294 @@ +// Copyright (c) 2021, SBEL GPU Development Team +// Copyright (c) 2021, University of Wisconsin - Madison +// +// SPDX-License-Identifier: BSD-3-Clause + +// ============================================================================= +// Simple collision test: a cube hits an analytical plane with no gravity. +// Cases: +// 1) Edge-first impact (45 deg rotation) +// 2) Corner-first impact (45 deg around X and Y) +// For each case, run with: +// a) Single patch cube +// b) 12-patch cube (one patch per triangle) +// Each scenario is repeated 10 times. We log rebound speed, rebound direction, +// and peak normal force on the plane, plus mean/min/max/std stats. +// ============================================================================= + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +using namespace deme; + +namespace { + +constexpr int kNumRuns = 10; +constexpr double kGap = 0.01; // 10 mm +constexpr double kSpeed = 1.0; // 1 m/s +constexpr double kTimeStep = 1e-5; // seconds +constexpr int kMaxSteps = 200000; // 2 seconds max +constexpr double kContactEps = 1e-6; // contact force threshold + +struct RunResult { + bool ok = false; + double rebound_speed = 0.0; + double peak_normal_force = 0.0; + float3 rebound_dir = make_float3(0, 0, 0); +}; + +struct Stats { + double mean = 0.0; + double min = 0.0; + double max = 0.0; + double stddev = 0.0; +}; + +double vec_length(const float3& v) { + return std::sqrt(v.x * v.x + v.y * v.y + v.z * v.z); +} + +double vec_dot(const float3& a, const float3& b) { + return a.x * b.x + a.y * b.y + a.z * b.z; +} + +float3 vec_scale(const float3& v, double s) { + return make_float3(v.x * s, v.y * s, v.z * s); +} + +Stats calc_stats(const std::vector& values) { + Stats s; + if (values.empty()) { + return s; + } + s.min = values.front(); + s.max = values.front(); + double sum = 0.0; + for (double v : values) { + s.min = std::min(s.min, v); + s.max = std::max(s.max, v); + sum += v; + } + s.mean = sum / values.size(); + double var = 0.0; + for (double v : values) { + double d = v - s.mean; + var += d * d; + } + s.stddev = std::sqrt(var / values.size()); + return s; +} + +double compute_min_z_rotated(const std::shared_ptr& mesh, const float4& rotQ) { + double min_z = std::numeric_limits::max(); + for (const auto& v_in : mesh->m_vertices) { + float3 v = v_in; + applyFrameTransformLocalToGlobal(v, make_float3(0, 0, 0), rotQ); + min_z = std::min(min_z, static_cast(v.z)); + } + return min_z; +} + +std::shared_ptr load_cube_template(DEMSolver& DEMSim, + const std::shared_ptr& mat_type, + bool per_triangle_patches) { + auto mesh_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_type, + true, // load_normals + false); // load_uv + if (!mesh_template) { + return nullptr; + } + + const size_t num_tris = mesh_template->GetNumTriangles(); + std::vector patch_ids(num_tris, 0); + if (per_triangle_patches) { + for (size_t i = 0; i < num_tris; ++i) { + patch_ids[i] = static_cast(i); + } + } + mesh_template->SetPatchIDs(patch_ids); + // Ensure material vector matches patch count after overriding patch IDs. + mesh_template->SetMaterial(mat_type); + return mesh_template; +} + +RunResult run_single_collision(const float4& init_rot, + bool per_triangle_patches, + const std::string& label, + int run_id) { + RunResult result; + + DEMSolver DEMSim; + DEMSim.SetOutputFormat(OUTPUT_FORMAT::CSV); + DEMSim.InstructBoxDomainDimension(5, 5, 5); + DEMSim.SetGravitationalAcceleration(make_float3(0, 0, 0)); + DEMSim.SetCDUpdateFreq(0); + DEMSim.UseAdaptiveUpdateFreq(false); + DEMSim.SetMeshUniversalContact(true); + + auto mat_type = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.00}}); + + float3 plane_normal = make_float3(0, 0, 1); + auto plane = DEMSim.AddBCPlane(make_float3(0, 0, 0), plane_normal, mat_type); + auto plane_tracker = DEMSim.Track(plane); + auto mesh_template = load_cube_template(DEMSim, mat_type, per_triangle_patches); + if (!mesh_template) { + std::cout << "[" << label << "] Run " << run_id << ": failed to load cube mesh" << std::endl; + return result; + } + double min_z = compute_min_z_rotated(mesh_template, init_rot); + double init_z = kGap - min_z; + + auto cube = DEMSim.AddMeshFromTemplate(mesh_template, make_float3(0, 0, 0)); + cube->SetFamily(0); + cube->SetMass(1.0); + cube->SetMOI(make_float3(1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0)); + cube->SetInitQuat(init_rot); + cube->SetInitPos(make_float3(0, 0, static_cast(init_z))); + auto cube_tracker = DEMSim.Track(cube); + + DEMSim.SetInitTimeStep(kTimeStep); + DEMSim.Initialize(); + cube_tracker->SetVel(make_float3(0, 0, -static_cast(kSpeed))); + + bool contact_started = false; + bool rebound_captured = false; + double peak_normal_force = 0.0; + + for (int step = 0; step < kMaxSteps; ++step) { + DEMSim.DoStepDynamics(); + + float3 plane_force = plane_tracker->ContactAcc(); + plane_force = vec_scale(plane_force, plane_tracker->Mass()); + double normal_force = std::abs(vec_dot(plane_force, plane_normal)); + peak_normal_force = std::max(peak_normal_force, normal_force); + + if (normal_force > kContactEps) { + contact_started = true; + } + + float3 vel = cube_tracker->Vel(); + double vel_n = vec_dot(vel, plane_normal); + + if (contact_started && normal_force <= kContactEps && vel_n > 0.0) { + double speed = vec_length(vel); + float3 dir = make_float3(0, 0, 0); + if (speed > 0) { + dir = vec_scale(vel, 1.0 / speed); + } + result.ok = true; + result.rebound_speed = speed; + result.peak_normal_force = peak_normal_force; + result.rebound_dir = dir; + rebound_captured = true; + break; + } + } + + if (!rebound_captured) { + std::cout << "[" << label << "] Run " << run_id << ": rebound not captured within max steps" << std::endl; + } + + return result; +} + +void print_stats_block(const std::string& label, + const std::vector& results) { + std::vector speeds; + std::vector forces; + std::vector dir_x; + std::vector dir_y; + std::vector dir_z; + + for (const auto& r : results) { + if (!r.ok) { + continue; + } + speeds.push_back(r.rebound_speed); + forces.push_back(r.peak_normal_force); + dir_x.push_back(r.rebound_dir.x); + dir_y.push_back(r.rebound_dir.y); + dir_z.push_back(r.rebound_dir.z); + } + + Stats s_speed = calc_stats(speeds); + Stats s_force = calc_stats(forces); + Stats s_dx = calc_stats(dir_x); + Stats s_dy = calc_stats(dir_y); + Stats s_dz = calc_stats(dir_z); + + std::cout << "\n=== " << label << " stats (population stddev) ===" << std::endl; + std::cout << "Rebound speed [m/s]: mean=" << s_speed.mean << " min=" << s_speed.min << " max=" << s_speed.max + << " std=" << s_speed.stddev << std::endl; + std::cout << "Peak normal force [N]: mean=" << s_force.mean << " min=" << s_force.min << " max=" << s_force.max + << " std=" << s_force.stddev << std::endl; + std::cout << "Rebound dir X: mean=" << s_dx.mean << " min=" << s_dx.min << " max=" << s_dx.max + << " std=" << s_dx.stddev << std::endl; + std::cout << "Rebound dir Y: mean=" << s_dy.mean << " min=" << s_dy.min << " max=" << s_dy.max + << " std=" << s_dy.stddev << std::endl; + std::cout << "Rebound dir Z: mean=" << s_dz.mean << " min=" << s_dz.min << " max=" << s_dz.max + << " std=" << s_dz.stddev << std::endl; +} + +float4 edge_quat() { + float4 q = make_float4(0, 0, 0, 1); + q = RotateQuat(q, make_float3(1, 0, 0), static_cast(PI / 4.0)); + return q; +} + +float4 corner_quat() { + float4 q = make_float4(0, 0, 0, 1); + q = RotateQuat(q, make_float3(1, 0, 0), static_cast(PI / 4.0)); + q = RotateQuat(q, make_float3(0, 1, 0), static_cast(PI / 4.0)); + return q; +} + +void run_scenario(const std::string& label, const float4& rot, bool per_triangle_patches) { + std::cout << "\n========================================" << std::endl; + std::cout << label << std::endl; + std::cout << "========================================" << std::endl; + + std::vector results; + results.reserve(kNumRuns); + + for (int i = 0; i < kNumRuns; ++i) { + RunResult r = run_single_collision(rot, per_triangle_patches, label, i); + results.push_back(r); + if (r.ok) { + std::cout << "Run " << i << ": speed=" << r.rebound_speed << " dir=(" << r.rebound_dir.x << ", " + << r.rebound_dir.y << ", " << r.rebound_dir.z << ") force=" << r.peak_normal_force + << std::endl; + } + } + + print_stats_block(label, results); +} + +} // namespace + +int main() { + std::cout << "========================================" << std::endl; + std::cout << "DEM Simple Collisions Test" << std::endl; + std::cout << "========================================" << std::endl; + + float4 q_edge = edge_quat(); + float4 q_corner = corner_quat(); + + run_scenario("Edge impact - single patch", q_edge, false); + run_scenario("Edge impact - 12 patches", q_edge, true); + run_scenario("Corner impact - single patch", q_corner, false); + run_scenario("Corner impact - 12 patches", q_corner, true); + + std::cout << "\n========================================" << std::endl; + std::cout << "Test completed" << std::endl; + std::cout << "========================================" << std::endl; + return 0; +} From f9b87fa8640cd29f2d50aa330adec13576ece8a5 Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Tue, 27 Jan 2026 14:20:01 +0100 Subject: [PATCH 11/17] Added auto volume and MOI calc for detailed meshes - Added also a tri path for different tangential stiffness --- src/DEM/BdrsAndObjs.h | 7 +- src/DEM/MeshUtils.cpp | 155 ++++++++++++++++++ src/demo/DEMdemo_ResponseAngleMesh.cpp | 40 +++-- .../FullHertzianForceModel.cu | 8 +- 4 files changed, 187 insertions(+), 23 deletions(-) diff --git a/src/DEM/BdrsAndObjs.h b/src/DEM/BdrsAndObjs.h index 7bb747ae..68b2bfb0 100644 --- a/src/DEM/BdrsAndObjs.h +++ b/src/DEM/BdrsAndObjs.h @@ -441,6 +441,10 @@ class DEMMesh : public DEMInitializer { assertThreeElements(MOI, "SetMOI", "MOI"); SetMOI(make_float3(MOI[0], MOI[1], MOI[2])); } + /// Compute volume, centroid and MOI in CoM frame (unit density). + void ComputeMassProperties(double& volume, float3& center, float3& inertia) const; + /// Check if mesh is watertight (closed, manifold). Returns true if no boundary/non-manifold edges. + bool IsWatertight(size_t* boundary_edges = nullptr, size_t* nonmanifold_edges = nullptr) const; /// Set mesh family number. void SetFamily(unsigned int num) { this->family_code = num; } @@ -459,9 +463,6 @@ class DEMMesh : public DEMInitializer { } /* - /// Compute barycenter, mass and MOI in CoM frame - void ComputeMassProperties(double& mass, float3& center, float3& inertia); - /// Create a map of neighboring triangles, vector of: /// [Ti TieA TieB TieC] /// (the free sides have triangle id = -1). diff --git a/src/DEM/MeshUtils.cpp b/src/DEM/MeshUtils.cpp index 80d35e96..f16b958c 100644 --- a/src/DEM/MeshUtils.cpp +++ b/src/DEM/MeshUtils.cpp @@ -185,6 +185,15 @@ bool DEMMesh::LoadSTLMesh(std::string input_file, bool load_normals) { m_face_uv_indices.clear(); set_default_patch_info(); + { + size_t boundary_edges = 0; + size_t nonmanifold_edges = 0; + if (!IsWatertight(&boundary_edges, &nonmanifold_edges)) { + DEME_WARNING( + "Mesh %s is not watertight (boundary edges: %zu, non-manifold edges: %zu). Auto Volume/MOI may be inaccurate.", + filename.c_str(), boundary_edges, nonmanifold_edges); + } + } return true; } @@ -553,6 +562,16 @@ bool DEMMesh::LoadWavefrontMesh(std::string input_file, bool load_normals, bool this->nPatches = 1; this->patches_explicitly_set = false; + { + size_t boundary_edges = 0; + size_t nonmanifold_edges = 0; + if (!IsWatertight(&boundary_edges, &nonmanifold_edges)) { + DEME_WARNING( + "Mesh %s is not watertight (boundary edges: %zu, non-manifold edges: %zu). Volume/MOI may be inaccurate.", + filename.c_str(), boundary_edges, nonmanifold_edges); + } + } + return true; } @@ -1620,4 +1639,140 @@ std::vector DEMMesh::ComputePatchLocations() const { return patch_locations; } +// Compute volume, centroid and MOI in CoM frame (unit density). +// ATTENTION: Only correct for "watertight" meshes with fine and non-degenerated triangles. +void DEMMesh::ComputeMassProperties(double& volume, float3& center, float3& inertia) const { + double vol = 0.0; + double mx = 0.0; + double my = 0.0; + double mz = 0.0; + double ix2 = 0.0; + double iy2 = 0.0; + double iz2 = 0.0; + double ixy = 0.0; + double iyz = 0.0; + double izx = 0.0; + + for (const auto& face : m_face_v_indices) { + const float3& a = m_vertices[face.x]; + const float3& b = m_vertices[face.y]; + const float3& c = m_vertices[face.z]; + + const float3 bcross = cross(b, c); + const double v = static_cast(dot(a, bcross)) / 6.0; + + vol += v; + mx += v * (static_cast(a.x) + b.x + c.x) / 4.0; + my += v * (static_cast(a.y) + b.y + c.y) / 4.0; + mz += v * (static_cast(a.z) + b.z + c.z) / 4.0; + + const double ax = a.x, ay = a.y, az = a.z; + const double bx = b.x, by = b.y, bz = b.z; + const double cx = c.x, cy = c.y, cz = c.z; + + const double f1x = ax * ax + bx * bx + cx * cx + ax * bx + bx * cx + cx * ax; + const double f1y = ay * ay + by * by + cy * cy + ay * by + by * cy + cy * ay; + const double f1z = az * az + bz * bz + cz * cz + az * bz + bz * cz + cz * az; + + ix2 += v * f1x / 10.0; + iy2 += v * f1y / 10.0; + iz2 += v * f1z / 10.0; + + const double fxy = 2.0 * (ax * ay + bx * by + cx * cy) + + (ax * by + ay * bx + bx * cy + by * cx + cx * ay + cy * ax); + const double fyz = 2.0 * (ay * az + by * bz + cy * cz) + + (ay * bz + az * by + by * cz + bz * cy + cy * az + cz * ay); + const double fzx = 2.0 * (az * ax + bz * bx + cz * cx) + + (az * bx + ax * bz + bz * cx + bx * cz + cz * ax + cx * az); + + ixy += v * fxy / 20.0; + iyz += v * fyz / 20.0; + izx += v * fzx / 20.0; + } + + if (vol == 0.0) { + volume = 0.0; + center = make_float3(0, 0, 0); + inertia = make_float3(0, 0, 0); + return; + } + + if (vol < 0.0) { + vol = -vol; + mx = -mx; + my = -my; + mz = -mz; + ix2 = -ix2; + iy2 = -iy2; + iz2 = -iz2; + ixy = -ixy; + iyz = -iyz; + izx = -izx; + } + + const double cx = mx / vol; + const double cy = my / vol; + const double cz = mz / vol; + + double Ixx = iy2 + iz2; + double Iyy = ix2 + iz2; + double Izz = ix2 + iy2; + double Ixy = -ixy; + double Iyz = -iyz; + double Izx = -izx; + + // Shift to center of mass. + Ixx -= vol * (cy * cy + cz * cz); + Iyy -= vol * (cx * cx + cz * cz); + Izz -= vol * (cx * cx + cy * cy); + Ixy += vol * cx * cy; + Iyz += vol * cy * cz; + Izx += vol * cz * cx; + + volume = vol; + center = make_float3(static_cast(cx), static_cast(cy), static_cast(cz)); + inertia = make_float3(static_cast(Ixx), static_cast(Iyy), static_cast(Izz)); +} + +bool DEMMesh::IsWatertight(size_t* boundary_edges, size_t* nonmanifold_edges) const { + if (boundary_edges) { + *boundary_edges = 0; + } + if (nonmanifold_edges) { + *nonmanifold_edges = 0; + } + if (m_face_v_indices.empty()) { + return true; + } + + std::map, size_t> edge_counts; + for (const auto& face : m_face_v_indices) { + std::pair edges[3] = {{std::min(face.x, face.y), std::max(face.x, face.y)}, + {std::min(face.y, face.z), std::max(face.y, face.z)}, + {std::min(face.z, face.x), std::max(face.z, face.x)}}; + for (int e = 0; e < 3; ++e) { + edge_counts[edges[e]]++; + } + } + + size_t boundary = 0; + size_t nonmanifold = 0; + for (const auto& kv : edge_counts) { + if (kv.second == 1) { + boundary++; + } else if (kv.second > 2) { + nonmanifold++; + } + } + + if (boundary_edges) { + *boundary_edges = boundary; + } + if (nonmanifold_edges) { + *nonmanifold_edges = nonmanifold; + } + + return boundary == 0 && nonmanifold == 0; +} + } // end namespace deme diff --git a/src/demo/DEMdemo_ResponseAngleMesh.cpp b/src/demo/DEMdemo_ResponseAngleMesh.cpp index cabefbe7..112e5a64 100644 --- a/src/demo/DEMdemo_ResponseAngleMesh.cpp +++ b/src/demo/DEMdemo_ResponseAngleMesh.cpp @@ -57,14 +57,6 @@ std::shared_ptr LoadStlMesh(DEMSolver& sim, return sim.AddMesh(mesh); } -float3 ComputeBoxMOI(const float3& dims, float mass) { - // MOI of a box about its center: Ixx = 1/12 m (b^2 + c^2), etc. - float ix = mass / 12.f * (dims.y * dims.y + dims.z * dims.z); - float iy = mass / 12.f * (dims.x * dims.x + dims.z * dims.z); - float iz = mass / 12.f * (dims.x * dims.x + dims.y * dims.y); - return make_float3(ix, iy, iz); -} - std::pair ComputeBounds(const std::vector& vertices) { float3 vmin = make_float3(std::numeric_limits::max()); float3 vmax = make_float3(std::numeric_limits::lowest()); @@ -107,9 +99,17 @@ int main() { const float tri_diag = std::sqrt(tri_dims.x * tri_dims.x + tri_dims.y * tri_dims.y + tri_dims.z * tri_dims.z); const float tri_radius = 0.5f * tri_diag; const float particle_density = 2600.0f; - const float particle_volume = tri_dims.x * tri_dims.y * tri_dims.z; - const float particle_mass = particle_density * particle_volume; - const float3 particle_moi = ComputeBoxMOI(tri_dims, particle_mass); + double tri_volume = 0.0; + float3 tri_center = make_float3(0, 0, 0); + float3 tri_inertia = make_float3(0, 0, 0); + tri_template->ComputeMassProperties(tri_volume, tri_center, tri_inertia); + const float particle_mass = static_cast(tri_volume * particle_density); + const float3 particle_moi = tri_inertia * particle_density; + std::cout << "Particle STL volume (m^3): " << tri_volume << std::endl; + std::cout << "Particle STL MOI (unit density, CoM): " << tri_inertia.x << ", " << tri_inertia.y << ", " + << tri_inertia.z << std::endl; + const double cube_vol = std::pow(4.0e-3, 3); + std::cout << "Particle mass (kg): " << particle_mass << std::endl; // Load drum mantle from STL; STL units are mm with z in [0, 100] path drum_path = GET_DATA_PATH() / "mesh" / "drum.stl"; @@ -118,14 +118,18 @@ int main() { const float drum_height = drum_max.z - drum_min.z; unsigned int drum_family = 100; drum_mesh->SetFamily(drum_family); - const float drum_mass = 5.0f; + const float drum_density = 2600.0f; + double drum_volume = 0.0; + float3 drum_center = make_float3(0, 0, 0); + float3 drum_inertia = make_float3(0, 0, 0); + drum_mesh->ComputeMassProperties(drum_volume, drum_center, drum_inertia); + const float drum_mass = static_cast(drum_volume * drum_density); drum_mesh->SetMass(drum_mass); - const float drum_outer_radius = - std::max(std::max(std::abs(drum_min.x), std::abs(drum_max.x)), - std::max(std::abs(drum_min.y), std::abs(drum_max.y))); - float izz = 0.5f * drum_mass * drum_outer_radius * drum_outer_radius; - float ixx = (drum_mass / 12.0f) * (3 * drum_outer_radius * drum_outer_radius + drum_height * drum_height); - drum_mesh->SetMOI(make_float3(ixx, ixx, izz)); + drum_mesh->SetMOI(drum_inertia * drum_density); + std::cout << "Drum STL volume (m^3): " << drum_volume << std::endl; + std::cout << "Drum STL MOI (unit density, CoM): " << drum_inertia.x << ", " << drum_inertia.y << ", " + << drum_inertia.z << std::endl; + std::cout << "Drum mass (kg): " << drum_mass << std::endl; DEMSim.SetFamilyPrescribedAngVel(drum_family, "0", "0", to_string_with_precision(drum_ang_vel)); // Add top and bottom planes at z = 0 and z = 0.1 m. They rotate with the drum family (axis-aligned so rotation diff --git a/src/kernel/DEMCustomizablePolicies/FullHertzianForceModel.cu b/src/kernel/DEMCustomizablePolicies/FullHertzianForceModel.cu index e5b6d89e..ccd94b27 100644 --- a/src/kernel/DEMCustomizablePolicies/FullHertzianForceModel.cu +++ b/src/kernel/DEMCustomizablePolicies/FullHertzianForceModel.cu @@ -127,9 +127,13 @@ if (overlapDepth > 0) { // Tangential force part if (mu_cnt > 0.f) { + float gt; const float kt = 8.f * G_cnt * contact_radius; - const float gt = - -deme::TWO_TIMES_SQRT_FIVE_OVER_SIX * beta * sqrtf(mass_eff * kt); // do we neen higher damping?? + if (tri_involved) { + gt = -deme::TWO_TIMES_SQRT_FIVE_OVER_THREE * beta * sqrtf(mass_eff * kt); + } else { + gt = -deme::TWO_TIMES_SQRT_FIVE_OVER_SIX * beta * sqrtf(mass_eff * kt); + } float3 tangent_force = -kt * delta_tan - gt * vrel_tan; const float ft = length(tangent_force); if (ft > DEME_TINY_FLOAT) { From 6745df9111997beeaee4d2659817113e7f81a870 Mon Sep 17 00:00:00 2001 From: Ruochun Date: Wed, 28 Jan 2026 00:27:26 +0800 Subject: [PATCH 12/17] Clarify that the patch loc metric works for MM contact only --- src/DEM/dT.cpp | 2 +- src/algorithms/DEMDynamicMisc.cu | 16 +- src/algorithms/DEMStaticDeviceSubroutines.h | 1 + src/demo/DEMdemo_MeshFalling.cpp | 153 ++++++++++---------- 4 files changed, 90 insertions(+), 82 deletions(-) diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index 47496ce0..312a9d09 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -2343,7 +2343,7 @@ inline void DEMDynamicThread::dispatchPatchBasedForceCorrections( // Step 1: Prepare weighted normals, areas, and keys // The kernel extracts keys from geomToPatchMap, computes weighted normals, and stores areas prepareWeightedNormalsForVoting(&granData, weightedNormals, areas, keys, startOffsetPrimitive, - countPrimitive, streamInfo.stream); + countPrimitive, contact_type, streamInfo.stream); // Step 2: Reduce-by-key for weighted normals (sum) // The keys are geomToPatchMap values (contactPairs_t), which group primitives by patch pair diff --git a/src/algorithms/DEMDynamicMisc.cu b/src/algorithms/DEMDynamicMisc.cu index 92c9344c..d56a782a 100644 --- a/src/algorithms/DEMDynamicMisc.cu +++ b/src/algorithms/DEMDynamicMisc.cu @@ -127,7 +127,8 @@ __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, double* areas, contactPairs_t* keys, contactPairs_t startOffset, - contactPairs_t count) { + contactPairs_t count, + contact_t contactType) { contactPairs_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { contactPairs_t myContactID = startOffset + idx; @@ -139,10 +140,12 @@ __global__ void prepareWeightedNormalsForVoting_impl(DEMDataDT* granData, float3 areaStorage = granData->contactPointGeometryB[myContactID]; double area = float3StorageToDouble(areaStorage); // But primitive contacts that do not respect the patch general direction have no right in deciding the contact - // normal - notStupidBool_t directionRespected = granData->contactPatchDirectionRespected[myContactID]; - if (!directionRespected) { - area = 0.0; + // normal (in mesh--mesh contact) + if (contactType == TRIANGLE_TRIANGLE_CONTACT) { + notStupidBool_t directionRespected = granData->contactPatchDirectionRespected[myContactID]; + if (!directionRespected) { + area = 0.0; + } } // Compute weighted normal (normal * area) @@ -163,11 +166,12 @@ void prepareWeightedNormalsForVoting(DEMDataDT* granData, contactPairs_t* keys, contactPairs_t startOffset, contactPairs_t count, + contact_t contactType, cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { prepareWeightedNormalsForVoting_impl<<>>( - granData, weightedNormals, areas, keys, startOffset, count); + granData, weightedNormals, areas, keys, startOffset, count, contactType); DEME_GPU_CALL(cudaStreamSynchronize(this_stream)); } } diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index edcaf6e5..c0cd93c2 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -179,6 +179,7 @@ void prepareWeightedNormalsForVoting(DEMDataDT* granData, contactPairs_t* keys, contactPairs_t startOffset, contactPairs_t count, + contact_t contactType, cudaStream_t& this_stream); // Normalizes voted normals by total area and scatters to output diff --git a/src/demo/DEMdemo_MeshFalling.cpp b/src/demo/DEMdemo_MeshFalling.cpp index 65a94682..26bdc0ae 100644 --- a/src/demo/DEMdemo_MeshFalling.cpp +++ b/src/demo/DEMdemo_MeshFalling.cpp @@ -35,8 +35,8 @@ int main() { DEMSim.SetMeshUniversalContact(true); // Define material properties - auto mat_box = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.4}, {"Crr", 0.1}}); - auto mat_plane = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.5}, {"mu", 0.3}, {"Crr", 0.1}}); + auto mat_box = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.4}, {"mu", 0.4}, {"Crr", 0.1}}); + auto mat_plane = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.4}, {"mu", 0.3}, {"Crr", 0.1}}); // Add a bottom plane at z = 0 DEMSim.AddBCPlane(make_float3(0, 0, 0), make_float3(0, 0, 1), mat_plane); @@ -45,7 +45,7 @@ int main() { const int num_particles_x = 6; const int num_particles_y = 6; const float particle_spacing = 2.0; - const float initial_height = 8.0; + const float initial_height = 2.0; const float base_size = 0.5; // Base scale for all meshes const float cylinder_scale_factor = 0.5; // Cylinders scaled down since they're taller @@ -61,82 +61,85 @@ int main() { for (int i = 0; i < num_particles_x; i++) { for (int j = 0; j < num_particles_y; j++) { - float x = (i - num_particles_x / 2.0 + 0.5) * particle_spacing + pos_dist(gen); - float y = (j - num_particles_y / 2.0 + 0.5) * particle_spacing + pos_dist(gen); - float z = initial_height + (i + j) * 0.5 + pos_dist(gen) * 2; - - // Select mesh type randomly: 0=cube, 1=sphere, 2=cone, 3=cylinder - int mesh_type = mesh_type_dist(gen); - std::shared_ptr particle; - - if (mesh_type == 0) { - // Cube with non-uniform scaling - particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_box); - float scale_x = base_size * scale_dist(gen); - float scale_y = base_size * scale_dist(gen); - float scale_z = base_size * scale_dist(gen); - particle->Scale(make_float3(scale_x, scale_y, scale_z)); // Non-uniform scaling - - // Set mass and MOI for the box (approximate as uniform density) - float mass = 1000.0 * scale_x * scale_y * scale_z; - float moi_x = mass * (scale_y * scale_y + scale_z * scale_z) / 12.0; - float moi_y = mass * (scale_x * scale_x + scale_z * scale_z) / 12.0; - float moi_z = mass * (scale_x * scale_x + scale_y * scale_y) / 12.0; - particle->SetMass(mass); - particle->SetMOI(make_float3(moi_x, moi_y, moi_z)); - } else if (mesh_type == 1) { - // Sphere (unit sphere in mesh) - particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/sphere.obj").string(), mat_box); - float scale = base_size * scale_dist(gen); - particle->Scale(scale); - - // Set mass and MOI for sphere - float mass = 1000.0 * (4.0 / 3.0) * math_PI * scale * scale * scale; - float moi = 0.4 * mass * scale * scale; // MOI for sphere - particle->SetMass(mass); - particle->SetMOI(make_float3(moi, moi, moi)); - } else if (mesh_type == 2) { - // Cone (height ~1, radius ~1 in mesh) - particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/cone.obj").string(), mat_box); - // Unit cone's CoM is at this location... - particle->InformCentroidPrincipal(make_float3(0, 0, 3. / 4.), make_float4(0, 0, 0, 1)); - float scale = base_size * scale_dist(gen); - particle->Scale(scale); - - // Set mass and MOI for cone (approximate) - float mass = 1000.0 * (1.0 / 3.0) * math_PI * scale * scale * scale; - float moi_base = 0.3 * mass * scale * scale; // Approximate MOI - float moi_height = 0.15 * mass * scale * scale; - particle->SetMass(mass); - particle->SetMOI(make_float3(moi_base, moi_base, moi_height)); - } else { - // Cylinder (radius ~1, height ~2 in mesh) - particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/cyl_r1_h2.obj").string(), mat_box); - float scale = base_size * cylinder_scale_factor * scale_dist(gen); - particle->Scale(scale); - - // Set mass and MOI for cylinder - float radius = scale; - float height = 2.0 * scale; - float mass = 1000.0 * math_PI * radius * radius * height; - float moi_radial = mass * (3.0 * radius * radius + height * height) / 12.0; - float moi_axial = 0.5 * mass * radius * radius; - particle->SetMass(mass); - particle->SetMOI(make_float3(moi_radial, moi_radial, moi_axial)); + for (int k = 0; k < 2; k++) { + float x = (i - num_particles_x / 2.0 + 0.5) * particle_spacing + pos_dist(gen); + float y = (j - num_particles_y / 2.0 + 0.5) * particle_spacing + pos_dist(gen); + float z = initial_height + (i + j) * 0.5 + pos_dist(gen) * 2 + k * 2.5; + + // Select mesh type randomly: 0=cube, 1=sphere, 2=cone, 3=cylinder + int mesh_type = mesh_type_dist(gen); + std::shared_ptr particle; + + if (mesh_type == 0) { + // Cube with non-uniform scaling + particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_box); + float scale_x = base_size * scale_dist(gen); + float scale_y = base_size * scale_dist(gen); + float scale_z = base_size * scale_dist(gen); + particle->Scale(make_float3(scale_x, scale_y, scale_z)); // Non-uniform scaling + + // Set mass and MOI for the box (approximate as uniform density) + float mass = 1000.0 * scale_x * scale_y * scale_z; + float moi_x = mass * (scale_y * scale_y + scale_z * scale_z) / 12.0; + float moi_y = mass * (scale_x * scale_x + scale_z * scale_z) / 12.0; + float moi_z = mass * (scale_x * scale_x + scale_y * scale_y) / 12.0; + particle->SetMass(mass); + particle->SetMOI(make_float3(moi_x, moi_y, moi_z)); + } else if (mesh_type == 1) { + // Sphere (unit sphere in mesh) + particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/sphere.obj").string(), mat_box); + float scale = base_size * scale_dist(gen); + particle->Scale(scale); + + // Set mass and MOI for sphere + float mass = 1000.0 * (4.0 / 3.0) * math_PI * scale * scale * scale; + float moi = 0.4 * mass * scale * scale; // MOI for sphere + particle->SetMass(mass); + particle->SetMOI(make_float3(moi, moi, moi)); + } else if (mesh_type == 2) { + // Cone (height ~1, radius ~1 in mesh) + particle = DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/cone.obj").string(), mat_box); + // Unit cone's CoM is at this location... + particle->InformCentroidPrincipal(make_float3(0, 0, 3. / 4.), make_float4(0, 0, 0, 1)); + float scale = base_size * scale_dist(gen); + particle->Scale(scale); + + // Set mass and MOI for cone (approximate) + float mass = 1000.0 * (1.0 / 3.0) * math_PI * scale * scale * scale; + float moi_base = 0.3 * mass * scale * scale; // Approximate MOI + float moi_height = 0.15 * mass * scale * scale; + particle->SetMass(mass); + particle->SetMOI(make_float3(moi_base, moi_base, moi_height)); + } else { + // Cylinder (radius ~1, height ~2 in mesh) + particle = + DEMSim.AddWavefrontMeshObject((GET_DATA_PATH() / "mesh/cyl_r1_h2.obj").string(), mat_box); + float scale = base_size * cylinder_scale_factor * scale_dist(gen); + particle->Scale(scale); + + // Set mass and MOI for cylinder + float radius = scale; + float height = 2.0 * scale; + float mass = 1000.0 * math_PI * radius * radius * height; + float moi_radial = mass * (3.0 * radius * radius + height * height) / 12.0; + float moi_axial = 0.5 * mass * radius * radius; + particle->SetMass(mass); + particle->SetMOI(make_float3(moi_radial, moi_radial, moi_axial)); + } + + particle->SetFamily(0); + particle->SetInitPos(make_float3(x, y, z)); + + // Add small initial rotation for more interesting dynamics + particle->SetInitQuat(make_float4(rot_dist(gen), rot_dist(gen), rot_dist(gen), 1.0)); + + auto tracker = DEMSim.Track(particle); + trackers.push_back(tracker); } - - particle->SetFamily(0); - particle->SetInitPos(make_float3(x, y, z)); - - // Add small initial rotation for more interesting dynamics - particle->SetInitQuat(make_float4(rot_dist(gen), rot_dist(gen), rot_dist(gen), 1.0)); - - auto tracker = DEMSim.Track(particle); - trackers.push_back(tracker); } } - float step_time = 1e-5; + float step_time = 5e-6; DEMSim.SetInitTimeStep(step_time); DEMSim.SetGravitationalAcceleration(make_float3(0, 0, -9.81)); DEMSim.SetExpandSafetyType("auto"); From b5893e5a2b6338a18fea32d820b2cb64f573a7e3 Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Tue, 27 Jan 2026 21:42:50 +0100 Subject: [PATCH 13/17] Fix merging mistakes, Modified collision Test with STL particles --- src/algorithms/DEMDynamicMisc.cu | 1 - src/algorithms/DEMStaticDeviceSubroutines.h | 1 - src/demo/CMakeLists.txt | 1 - .../ModularTests/DEMTest_SimpleCollisions.cpp | 90 +++++++++++++++---- 4 files changed, 71 insertions(+), 22 deletions(-) diff --git a/src/algorithms/DEMDynamicMisc.cu b/src/algorithms/DEMDynamicMisc.cu index 52e43396..ee3bdc51 100644 --- a/src/algorithms/DEMDynamicMisc.cu +++ b/src/algorithms/DEMDynamicMisc.cu @@ -150,7 +150,6 @@ void prepareWeightedNormalsForVoting(DEMDataDT* granData, float3* weightedNormals, contactPairs_t startOffset, contactPairs_t count, - contact_t contactType, cudaStream_t& this_stream) { size_t blocks_needed = (count + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; if (blocks_needed > 0) { diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index 7eb19461..87cf6685 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -248,7 +248,6 @@ void finalizePatchResultsFromAccumulators(const PatchContactAccum* patchAccumula const float3* zeroAreaNormals, const double* zeroAreaPenetrations, const double3* zeroAreaContactPoints, - const notStupidBool_t* patchHasSAT, double* finalAreas, float3* finalNormals, double* finalPenetrations, diff --git a/src/demo/CMakeLists.txt b/src/demo/CMakeLists.txt index fd97e089..70f46bbc 100644 --- a/src/demo/CMakeLists.txt +++ b/src/demo/CMakeLists.txt @@ -24,7 +24,6 @@ SET(DEMOS DEMdemo_TestPack DEMdemo_TestRestart DEMdemo_RotatingDrum - DEMdemo_DrumCubes DEMdemo_Centrifuge DEMdemo_DrumCubes DEMdemo_ResponseAngleMesh diff --git a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp index 34d98d64..b8a99a20 100644 --- a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp +++ b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp @@ -31,6 +31,10 @@ using namespace deme; namespace { +constexpr bool kUseTriangleParticles = true; // toggle to run the STL-based triangle setup +constexpr float kMmToMeters = 0.001f; +constexpr double kTriangleParticleDensity = 2600.0; + constexpr int kNumRuns = 10; constexpr double kGap = 0.01; // 10 mm constexpr double kSpeed = 1.0; // 1 m/s @@ -97,16 +101,12 @@ double compute_min_z_rotated(const std::shared_ptr& mesh, const float4& return min_z; } -std::shared_ptr load_cube_template(DEMSolver& DEMSim, - const std::shared_ptr& mat_type, - bool per_triangle_patches) { - auto mesh_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_type, - true, // load_normals - false); // load_uv +void assign_patch_ids(const std::shared_ptr& mesh_template, + bool per_triangle_patches, + const std::shared_ptr& mat_type) { if (!mesh_template) { - return nullptr; + return; } - const size_t num_tris = mesh_template->GetNumTriangles(); std::vector patch_ids(num_tris, 0); if (per_triangle_patches) { @@ -115,13 +115,50 @@ std::shared_ptr load_cube_template(DEMSolver& DEMSim, } } mesh_template->SetPatchIDs(patch_ids); - // Ensure material vector matches patch count after overriding patch IDs. mesh_template->SetMaterial(mat_type); +} + +std::shared_ptr load_cube_template(DEMSolver& DEMSim, + const std::shared_ptr& mat_type, + bool per_triangle_patches) { + auto mesh_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_type, + true, // load_normals + false); // load_uv + if (!mesh_template) { + return nullptr; + } + + assign_patch_ids(mesh_template, per_triangle_patches, mat_type); + return mesh_template; +} + +std::shared_ptr load_triangle_template(DEMSolver& DEMSim, + const std::shared_ptr& mat_type, + bool per_triangle_patches, + float& out_mass, + float3& out_moi) { + std::shared_ptr mesh_template = + DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/simpleTriangleShape4mm.stl").string(), mat_type, true, false); + if (!mesh_template) { + return nullptr; + } + mesh_template->Scale(kMmToMeters); + + double volume = 0.0; + float3 center = make_float3(0, 0, 0); + float3 inertia = make_float3(0, 0, 0); + mesh_template->ComputeMassProperties(volume, center, inertia); + + out_mass = static_cast(volume * kTriangleParticleDensity); + out_moi = inertia * static_cast(kTriangleParticleDensity); + + assign_patch_ids(mesh_template, per_triangle_patches, mat_type); return mesh_template; } RunResult run_single_collision(const float4& init_rot, bool per_triangle_patches, + bool use_triangle_particles, const std::string& label, int run_id) { RunResult result; @@ -139,9 +176,18 @@ RunResult run_single_collision(const float4& init_rot, float3 plane_normal = make_float3(0, 0, 1); auto plane = DEMSim.AddBCPlane(make_float3(0, 0, 0), plane_normal, mat_type); auto plane_tracker = DEMSim.Track(plane); - auto mesh_template = load_cube_template(DEMSim, mat_type, per_triangle_patches); + const char* mesh_desc = use_triangle_particles ? "triangle mesh" : "cube mesh"; + auto mesh_template = std::shared_ptr{}; + float particle_mass = 1.0f; + float3 particle_moi = make_float3(1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f); + + if (use_triangle_particles) { + mesh_template = load_triangle_template(DEMSim, mat_type, per_triangle_patches, particle_mass, particle_moi); + } else { + mesh_template = load_cube_template(DEMSim, mat_type, per_triangle_patches); + } if (!mesh_template) { - std::cout << "[" << label << "] Run " << run_id << ": failed to load cube mesh" << std::endl; + std::cout << "[" << label << "] Run " << run_id << ": failed to load " << mesh_desc << std::endl; return result; } double min_z = compute_min_z_rotated(mesh_template, init_rot); @@ -149,8 +195,8 @@ RunResult run_single_collision(const float4& init_rot, auto cube = DEMSim.AddMeshFromTemplate(mesh_template, make_float3(0, 0, 0)); cube->SetFamily(0); - cube->SetMass(1.0); - cube->SetMOI(make_float3(1.0 / 6.0, 1.0 / 6.0, 1.0 / 6.0)); + cube->SetMass(particle_mass); + cube->SetMOI(particle_moi); cube->SetInitQuat(init_rot); cube->SetInitPos(make_float3(0, 0, static_cast(init_z))); auto cube_tracker = DEMSim.Track(cube); @@ -251,16 +297,20 @@ float4 corner_quat() { return q; } -void run_scenario(const std::string& label, const float4& rot, bool per_triangle_patches) { +void run_scenario(const std::string& label, + const float4& rot, + bool per_triangle_patches, + bool use_triangle_particles) { std::cout << "\n========================================" << std::endl; std::cout << label << std::endl; std::cout << "========================================" << std::endl; + std::cout << "Using mesh: " << (use_triangle_particles ? "simpleTriangleShape4mm.stl" : "cube.obj") << std::endl; std::vector results; results.reserve(kNumRuns); for (int i = 0; i < kNumRuns; ++i) { - RunResult r = run_single_collision(rot, per_triangle_patches, label, i); + RunResult r = run_single_collision(rot, per_triangle_patches, use_triangle_particles, label, i); results.push_back(r); if (r.ok) { std::cout << "Run " << i << ": speed=" << r.rebound_speed << " dir=(" << r.rebound_dir.x << ", " @@ -278,14 +328,16 @@ int main() { std::cout << "========================================" << std::endl; std::cout << "DEM Simple Collisions Test" << std::endl; std::cout << "========================================" << std::endl; + std::cout << "Particle mesh mode: " + << (kUseTriangleParticles ? "simpleTriangleShape4mm.stl" : "cube.obj") << std::endl; float4 q_edge = edge_quat(); float4 q_corner = corner_quat(); - run_scenario("Edge impact - single patch", q_edge, false); - run_scenario("Edge impact - 12 patches", q_edge, true); - run_scenario("Corner impact - single patch", q_corner, false); - run_scenario("Corner impact - 12 patches", q_corner, true); + run_scenario("Edge impact - single patch", q_edge, false, kUseTriangleParticles); + run_scenario("Edge impact - 12 patches", q_edge, true, kUseTriangleParticles); + run_scenario("Corner impact - single patch", q_corner, false, kUseTriangleParticles); + run_scenario("Corner impact - 12 patches", q_corner, true, kUseTriangleParticles); std::cout << "\n========================================" << std::endl; std::cout << "Test completed" << std::endl; From cde63ee3e6111801fb8516c698861dd67a0bfca1 Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Wed, 28 Jan 2026 11:14:53 +0100 Subject: [PATCH 14/17] Fix planar contact bug, fixed isWatertight check, more debug info test collision --- src/DEM/MeshUtils.cpp | 146 ++++++++++++++---- .../ModularTests/DEMTest_SimpleCollisions.cpp | 126 ++++++++++++++- src/kernel/DEMCalcForceKernels_Primitive.cu | 4 + 3 files changed, 248 insertions(+), 28 deletions(-) diff --git a/src/DEM/MeshUtils.cpp b/src/DEM/MeshUtils.cpp index f16b958c..daee8c4d 100644 --- a/src/DEM/MeshUtils.cpp +++ b/src/DEM/MeshUtils.cpp @@ -1734,45 +1734,137 @@ void DEMMesh::ComputeMassProperties(double& volume, float3& center, float3& iner inertia = make_float3(static_cast(Ixx), static_cast(Iyy), static_cast(Izz)); } -bool DEMMesh::IsWatertight(size_t* boundary_edges, size_t* nonmanifold_edges) const { - if (boundary_edges) { - *boundary_edges = 0; - } - if (nonmanifold_edges) { - *nonmanifold_edges = 0; +// Section for Watertight test, false if not + +struct QuantKey3 { + int64_t x, y, z; + bool operator==(const QuantKey3& o) const noexcept { return x==o.x && y==o.y && z==o.z; } +}; +struct QuantKey3Hash { + size_t operator()(const QuantKey3& k) const noexcept { + size_t h1 = std::hash{}(k.x); + size_t h2 = std::hash{}(k.y); + size_t h3 = std::hash{}(k.z); + size_t h = h1; + h ^= h2 + 0x9e3779b97f4a7c15ULL + (h<<6) + (h>>2); + h ^= h3 + 0x9e3779b97f4a7c15ULL + (h<<6) + (h>>2); + return h; } - if (m_face_v_indices.empty()) { +}; + +static inline int64_t q(double v, double eps) { + return (int64_t)std::llround(v / eps); +} + +bool DEMMesh::IsWatertight(size_t* boundary_edges, size_t* nonmanifold_edges) const { + if (boundary_edges) *boundary_edges = 0; + if (nonmanifold_edges) *nonmanifold_edges = 0; + if (m_face_v_indices.empty()) return true; + + auto count_edges_by_index = [&](size_t& boundary, size_t& nonmanifold) { + std::map, size_t> edge_counts; + + for (const auto& face : m_face_v_indices) { + const int fx = face.x, fy = face.y, fz = face.z; + if (fx < 0 || fy < 0 || fz < 0) continue; + + const size_t a = (size_t)fx, b = (size_t)fy, c = (size_t)fz; + if (a == b || b == c || c == a) continue; + + std::pair edges[3] = { + {std::min(a,b), std::max(a,b)}, + {std::min(b,c), std::max(b,c)}, + {std::min(c,a), std::max(c,a)} + }; + edge_counts[edges[0]]++; + edge_counts[edges[1]]++; + edge_counts[edges[2]]++; + } + + boundary = 0; nonmanifold = 0; + for (const auto& kv : edge_counts) { + if (kv.second == 1) boundary++; + else if (kv.second > 2) nonmanifold++; + } + }; + + size_t boundary1 = 0, nonmanifold1 = 0; + count_edges_by_index(boundary1, nonmanifold1); + + if (boundary1 == 0 && nonmanifold1 == 0) { + if (boundary_edges) *boundary_edges = 0; + if (nonmanifold_edges) *nonmanifold_edges = 0; return true; } - std::map, size_t> edge_counts; - for (const auto& face : m_face_v_indices) { - std::pair edges[3] = {{std::min(face.x, face.y), std::max(face.x, face.y)}, - {std::min(face.y, face.z), std::max(face.y, face.z)}, - {std::min(face.z, face.x), std::max(face.z, face.x)}}; - for (int e = 0; e < 3; ++e) { - edge_counts[edges[e]]++; - } + if (m_vertices.empty()) { + if (boundary_edges) *boundary_edges = boundary1; + if (nonmanifold_edges) *nonmanifold_edges = nonmanifold1; + return false; } - size_t boundary = 0; - size_t nonmanifold = 0; - for (const auto& kv : edge_counts) { - if (kv.second == 1) { - boundary++; - } else if (kv.second > 2) { - nonmanifold++; + double minx = m_vertices[0].x, miny = m_vertices[0].y, minz = m_vertices[0].z; + double maxx = minx, maxy = miny, maxz = minz; + for (const auto& v : m_vertices) { + minx = std::min(minx, (double)v.x); miny = std::min(miny, (double)v.y); + minz = std::min(minz, (double)v.z); + maxx = std::max(maxx, (double)v.x); maxy = std::max(maxy, (double)v.y); + maxz = std::max(maxz, (double)v.z); + } + const double dx = maxx - minx, dy = maxy - miny, dz = maxz - minz; + const double diag = std::sqrt(dx*dx + dy*dy + dz*dz); + const double eps = std::max(diag * 1e-9, 1e-12); + + std::unordered_map rep; + rep.reserve(m_vertices.size()); + + std::vector canon(m_vertices.size(), (size_t)-1); + size_t next_id = 0; + + for (size_t i = 0; i < m_vertices.size(); ++i) { + const auto& v = m_vertices[i]; + QuantKey3 key{ q(v.x, eps), q(v.y, eps), q(v.z, eps) }; + + auto it = rep.find(key); + if (it == rep.end()) { + rep.emplace(key, next_id); + canon[i] = next_id; + next_id++; + } else { + canon[i] = it->second; } } - if (boundary_edges) { - *boundary_edges = boundary; + std::map, size_t> edge_counts2; + for (const auto& face : m_face_v_indices) { + const int fx = face.x, fy = face.y, fz = face.z; + if (fx < 0 || fy < 0 || fz < 0) continue; + + const size_t a0 = (size_t)fx, b0 = (size_t)fy, c0 = (size_t)fz; + if (a0 >= canon.size() || b0 >= canon.size() || c0 >= canon.size()) continue; + + const size_t a = canon[a0], b = canon[b0], c = canon[c0]; + if (a == b || b == c || c == a) continue; + + std::pair edges[3] = { + {std::min(a,b), std::max(a,b)}, + {std::min(b,c), std::max(b,c)}, + {std::min(c,a), std::max(c,a)} + }; + edge_counts2[edges[0]]++; + edge_counts2[edges[1]]++; + edge_counts2[edges[2]]++; } - if (nonmanifold_edges) { - *nonmanifold_edges = nonmanifold; + + size_t boundary2 = 0, nonmanifold2 = 0; + for (const auto& kv : edge_counts2) { + if (kv.second == 1) boundary2++; + else if (kv.second > 2) nonmanifold2++; } - return boundary == 0 && nonmanifold == 0; + if (boundary_edges) *boundary_edges = boundary2; + if (nonmanifold_edges) *nonmanifold_edges = nonmanifold2; + return boundary2 == 0 && nonmanifold2 == 0; } } // end namespace deme diff --git a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp index b8a99a20..0662f106 100644 --- a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp +++ b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp @@ -21,17 +21,21 @@ #include #include +#include +#include #include #include #include #include +#include #include using namespace deme; +using namespace std::filesystem; namespace { -constexpr bool kUseTriangleParticles = true; // toggle to run the STL-based triangle setup +constexpr bool kUseTriangleParticles = false; // toggle to run the STL-based triangle setup constexpr float kMmToMeters = 0.001f; constexpr double kTriangleParticleDensity = 2600.0; @@ -41,6 +45,10 @@ constexpr double kSpeed = 1.0; // 1 m/s constexpr double kTimeStep = 1e-5; // seconds constexpr int kMaxSteps = 200000; // 2 seconds max constexpr double kContactEps = 1e-6; // contact force threshold +constexpr bool kFixWinding = true; // flip inward-facing triangles based on CoM +constexpr bool kWriteFrames = true; +constexpr unsigned int kOutputFPS = 2000; +constexpr const char* kOutputDir = "DemoOutput_SimpleCollisions"; struct RunResult { bool ok = false; @@ -68,6 +76,88 @@ float3 vec_scale(const float3& v, double s) { return make_float3(v.x * s, v.y * s, v.z * s); } +std::pair compute_bounds(const std::vector& vertices) { + if (vertices.empty()) { + return {make_float3(0, 0, 0), make_float3(0, 0, 0)}; + } + float3 min_v = vertices.front(); + float3 max_v = vertices.front(); + for (const auto& v : vertices) { + min_v.x = std::min(min_v.x, v.x); + min_v.y = std::min(min_v.y, v.y); + min_v.z = std::min(min_v.z, v.z); + max_v.x = std::max(max_v.x, v.x); + max_v.y = std::max(max_v.y, v.y); + max_v.z = std::max(max_v.z, v.z); + } + return {min_v, max_v}; +} + +void print_mesh_diagnostics(const std::shared_ptr& mesh, const std::string& label) { + if (!mesh) { + return; + } + size_t boundary_edges = 0; + size_t nonmanifold_edges = 0; + bool watertight = mesh->IsWatertight(&boundary_edges, &nonmanifold_edges); + + double volume = 0.0; + float3 center = make_float3(0, 0, 0); + float3 inertia = make_float3(0, 0, 0); + mesh->ComputeMassProperties(volume, center, inertia); + + auto [min_v, max_v] = compute_bounds(mesh->GetCoordsVertices()); + float3 dims = max_v - min_v; + + std::cout << "\n[" << label << "] mesh diagnostics" << std::endl; + std::cout << "Vertices: " << mesh->GetNumNodes() << " Triangles: " << mesh->GetNumTriangles() << std::endl; + std::cout << "Bounds min=(" << min_v.x << ", " << min_v.y << ", " << min_v.z << ") max=(" << max_v.x << ", " + << max_v.y << ", " << max_v.z << ") dims=(" << dims.x << ", " << dims.y << ", " << dims.z << ")" + << std::endl; + std::cout << "Watertight: " << (watertight ? "yes" : "no") << " boundary_edges=" << boundary_edges + << " nonmanifold_edges=" << nonmanifold_edges << std::endl; + std::cout << "Volume=" << volume << " CoM=(" << center.x << ", " << center.y << ", " << center.z + << ") MOI(unit density, CoM)=(" << inertia.x << ", " << inertia.y << ", " << inertia.z << ")" + << std::endl; +} + +void diagnose_winding(const std::shared_ptr& mesh, + const std::string& label, + bool fix_winding) { + if (!mesh || mesh->m_face_v_indices.empty()) { + return; + } + double volume = 0.0; + float3 center = make_float3(0, 0, 0); + float3 inertia = make_float3(0, 0, 0); + mesh->ComputeMassProperties(volume, center, inertia); + if (volume == 0.0) { + center = make_float3(0, 0, 0); + } + + size_t inward = 0; + for (size_t i = 0; i < mesh->m_face_v_indices.size(); ++i) { + const int3& f = mesh->m_face_v_indices[i]; + const float3& v0 = mesh->m_vertices[f.x]; + const float3& v1 = mesh->m_vertices[f.y]; + const float3& v2 = mesh->m_vertices[f.z]; + const float3 n = face_normal(v0, v1, v2); + const float3 centroid = (v0 + v1 + v2) / 3.0f; + const float3 to_face = centroid - center; + const float d = dot(n, to_face); + if (d < 0.0f) { + inward++; + if (fix_winding) { + mesh->m_face_v_indices[i] = make_int3(f.x, f.z, f.y); + } + } + } + + std::cout << "\n[" << label << "] winding diagnostics" << std::endl; + std::cout << "Faces total=" << mesh->m_face_v_indices.size() << " inward=" << inward + << (fix_winding ? " (flipped)" : "") << std::endl; +} + Stats calc_stats(const std::vector& values) { Stats s; if (values.empty()) { @@ -152,6 +242,18 @@ std::shared_ptr load_triangle_template(DEMSolver& DEMSim, out_mass = static_cast(volume * kTriangleParticleDensity); out_moi = inertia * static_cast(kTriangleParticleDensity); + print_mesh_diagnostics(mesh_template, "simpleTriangleShape4mm.stl (scaled)"); + diagnose_winding(mesh_template, "simpleTriangleShape4mm.stl (scaled)", kFixWinding); + if (center.x != 0.0f || center.y != 0.0f || center.z != 0.0f) { + for (auto& v : mesh_template->m_vertices) { + v.x -= center.x; + v.y -= center.y; + v.z -= center.z; + } + std::cout << "[simpleTriangleShape4mm.stl] shifted vertices to CoM frame (" + << center.x << ", " << center.y << ", " << center.z << ")" << std::endl; + } + assign_patch_ids(mesh_template, per_triangle_patches, mat_type); return mesh_template; } @@ -165,6 +267,7 @@ RunResult run_single_collision(const float4& init_rot, DEMSolver DEMSim; DEMSim.SetOutputFormat(OUTPUT_FORMAT::CSV); + DEMSim.SetMeshOutputFormat("VTK"); DEMSim.InstructBoxDomainDimension(5, 5, 5); DEMSim.SetGravitationalAcceleration(make_float3(0, 0, 0)); DEMSim.SetCDUpdateFreq(0); @@ -208,10 +311,31 @@ RunResult run_single_collision(const float4& init_rot, bool contact_started = false; bool rebound_captured = false; double peak_normal_force = 0.0; + unsigned int frame_id = 0; + double next_frame_time = 0.0; + path out_dir; + if (kWriteFrames) { + out_dir = current_path() / kOutputDir / label / ("run_" + std::to_string(run_id)); + create_directories(out_dir); + next_frame_time = 0.0; + char filename[128]; + std::snprintf(filename, sizeof(filename), "frame_%06u.vtk", frame_id++); + DEMSim.WriteMeshFile(out_dir / filename); + } for (int step = 0; step < kMaxSteps; ++step) { DEMSim.DoStepDynamics(); + if (kWriteFrames) { + double sim_time = DEMSim.GetSimTime(); + while (sim_time + 1e-12 >= next_frame_time) { + char filename[128]; + std::snprintf(filename, sizeof(filename), "frame_%06u.vtk", frame_id++); + DEMSim.WriteMeshFile(out_dir / filename); + next_frame_time += 1.0 / static_cast(kOutputFPS); + } + } + float3 plane_force = plane_tracker->ContactAcc(); plane_force = vec_scale(plane_force, plane_tracker->Mass()); double normal_force = std::abs(vec_dot(plane_force, plane_normal)); diff --git a/src/kernel/DEMCalcForceKernels_Primitive.cu b/src/kernel/DEMCalcForceKernels_Primitive.cu index 43fbe269..a7261b7e 100644 --- a/src/kernel/DEMCalcForceKernels_Primitive.cu +++ b/src/kernel/DEMCalcForceKernels_Primitive.cu @@ -59,6 +59,10 @@ __device__ __forceinline__ void calculatePrimitiveContactForces_impl(deme::DEMSi // resulting into the correct place needs to be done here. deme::contactPairs_t myPatchContactID = granData->geomToPatchMap[myPrimitiveContactID]; + // Default: patch-direction check should not filter non-tri-tri contacts. + // Tri-tri will overwrite this after computing patch direction. + granData->contactPatchDirectionRespected[myPrimitiveContactID] = 1; + // ---------------------------------------------------------------- // Based on A's type, equip info // ---------------------------------------------------------------- From 5232ad04c9005239d8f9ff6eae2a414a97c566ec Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Thu, 29 Jan 2026 16:56:22 +0100 Subject: [PATCH 15/17] Fixed and improved demo --- src/demo/DEMdemo_ResponseAngleMesh.cpp | 117 +++++++-------- .../ModularTests/DEMTest_SimpleCollisions.cpp | 135 +----------------- 2 files changed, 65 insertions(+), 187 deletions(-) diff --git a/src/demo/DEMdemo_ResponseAngleMesh.cpp b/src/demo/DEMdemo_ResponseAngleMesh.cpp index 112e5a64..f5064ea8 100644 --- a/src/demo/DEMdemo_ResponseAngleMesh.cpp +++ b/src/demo/DEMdemo_ResponseAngleMesh.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -27,34 +28,34 @@ using namespace std::filesystem; namespace { -/// Load an STL mesh, scale it, attach material and register it as a template. -std::shared_ptr LoadStlTemplate(DEMSolver& sim, - const path& file, - const std::shared_ptr& mat, - float scale) { - DEMMesh mesh; - bool ok = mesh.LoadSTLMesh(file.string()); - if (!ok) { - DEME_ERROR("Failed to load STL mesh template %s", file.string().c_str()); +std::string ToLower(std::string s) { + for (char& c : s) { + c = static_cast(std::tolower(static_cast(c))); } - mesh.SetMaterial(mat); - mesh.Scale(scale); - return sim.LoadMeshType(mesh); + return s; } -/// Load an STL mesh, scale it, attach material and place it directly in the scene. -std::shared_ptr LoadStlMesh(DEMSolver& sim, - const path& file, - const std::shared_ptr& mat, - float scale) { +/// Load a mesh (STL or OBJ), scale it, attach material and register it as a template. +std::shared_ptr LoadMeshTemplate(DEMSolver& sim, + const path& file, + const std::shared_ptr& mat, + float scale) { DEMMesh mesh; - bool ok = mesh.LoadSTLMesh(file.string()); + std::string ext = ToLower(file.extension().string()); + bool ok = false; + if (ext == ".stl") { + ok = mesh.LoadSTLMesh(file.string()); + } else if (ext == ".obj") { + ok = mesh.LoadWavefrontMesh(file.string()); + } else { + DEME_ERROR("Unsupported mesh format: %s (only .stl or .obj)", ext.c_str()); + } if (!ok) { - DEME_ERROR("Failed to load STL mesh %s", file.string().c_str()); + DEME_ERROR("Failed to load mesh template %s", file.string().c_str()); } mesh.SetMaterial(mat); mesh.Scale(scale); - return sim.AddMesh(mesh); + return sim.LoadMeshType(mesh); } std::pair ComputeBounds(const std::vector& vertices) { @@ -82,18 +83,23 @@ int main() { DEMSim.SetMeshUniversalContact(true); const float mm_to_m = 0.001f; const float drum_inner_radius = 0.1f; // 200 mm diameter - const float wall_clearance = 0.002f; // leave a small gap to the mantle + const float wall_clearance = 0.001f; // leave a small gap to the mantle const float rpm = 40.0f; const float drum_ang_vel = rpm * 2.0f * PI / 60.0f; auto mat_type_particle = - DEMSim.LoadMaterial({{"E", 1e6}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.01}}); - auto mat_type_drum = DEMSim.LoadMaterial({{"E", 2e6}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.01}}); + DEMSim.LoadMaterial({{"E", 1e6}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.00}}); + auto mat_type_drum = DEMSim.LoadMaterial({{"E", 2e6}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.00}}); DEMSim.SetMaterialPropertyPair("mu", mat_type_particle, mat_type_drum, 0.5); - // Load particle mesh template from STL (approx. 4 mm triangular prism) - path tri_path = GET_DATA_PATH() / "mesh" / "simpleTriangleShape4mm.stl"; - auto tri_template = LoadStlTemplate(DEMSim, tri_path, mat_type_particle, mm_to_m); + // --------------------- Particle settings block --------------------- + // Mesh file can be .stl or .obj (path is relative to data/mesh). + const path particle_mesh_file = GET_DATA_PATH() / "mesh" / "cube.obj"; // "simpleTriangleShape4mm.stl" + const float particle_mesh_scale = mm_to_m * 5.0f; // 1.0f for STLs in mm size + const unsigned int target_particles = 5000; + // ------------------------------------------------------------------- + + auto tri_template = LoadMeshTemplate(DEMSim, particle_mesh_file, mat_type_particle, particle_mesh_scale); auto [tri_min, tri_max] = ComputeBounds(tri_template->GetCoordsVertices()); const float3 tri_dims = tri_max - tri_min; const float tri_diag = std::sqrt(tri_dims.x * tri_dims.x + tri_dims.y * tri_dims.y + tri_dims.z * tri_dims.z); @@ -105,50 +111,41 @@ int main() { tri_template->ComputeMassProperties(tri_volume, tri_center, tri_inertia); const float particle_mass = static_cast(tri_volume * particle_density); const float3 particle_moi = tri_inertia * particle_density; - std::cout << "Particle STL volume (m^3): " << tri_volume << std::endl; - std::cout << "Particle STL MOI (unit density, CoM): " << tri_inertia.x << ", " << tri_inertia.y << ", " + std::cout << "Particle volume (m^3): " << tri_volume << ", mass (kg): "<< particle_mass << std::endl; + std::cout << "Particle MOI (unit density, CoM): " << tri_inertia.x << ", " << tri_inertia.y << ", " << tri_inertia.z << std::endl; const double cube_vol = std::pow(4.0e-3, 3); - std::cout << "Particle mass (kg): " << particle_mass << std::endl; - // Load drum mantle from STL; STL units are mm with z in [0, 100] - path drum_path = GET_DATA_PATH() / "mesh" / "drum.stl"; - auto drum_mesh = LoadStlMesh(DEMSim, drum_path, mat_type_drum, mm_to_m); - auto [drum_min, drum_max] = ComputeBounds(drum_mesh->GetCoordsVertices()); - const float drum_height = drum_max.z - drum_min.z; + // Analytical drum mantle (planar contact cylinder) with end caps. + const float drum_height = 0.1f; + const float drum_mass = 1.0f; + const float IZZ = drum_mass * drum_inner_radius * drum_inner_radius / 2.0f; + const float IYY = (drum_mass / 12.0f) * (3.0f * drum_inner_radius * drum_inner_radius + drum_height * drum_height); unsigned int drum_family = 100; - drum_mesh->SetFamily(drum_family); - const float drum_density = 2600.0f; - double drum_volume = 0.0; - float3 drum_center = make_float3(0, 0, 0); - float3 drum_inertia = make_float3(0, 0, 0); - drum_mesh->ComputeMassProperties(drum_volume, drum_center, drum_inertia); - const float drum_mass = static_cast(drum_volume * drum_density); - drum_mesh->SetMass(drum_mass); - drum_mesh->SetMOI(drum_inertia * drum_density); - std::cout << "Drum STL volume (m^3): " << drum_volume << std::endl; - std::cout << "Drum STL MOI (unit density, CoM): " << drum_inertia.x << ", " << drum_inertia.y << ", " - << drum_inertia.z << std::endl; - std::cout << "Drum mass (kg): " << drum_mass << std::endl; + + auto drum = DEMSim.AddExternalObject(); + drum->AddPlanarContactCylinder(make_float3(0, 0, drum_height / 2.0f), make_float3(0, 0, 1), drum_inner_radius, + mat_type_drum, ENTITY_NORMAL_INWARD); + drum->SetFamily(drum_family); + drum->SetMass(drum_mass); + drum->SetMOI(make_float3(IYY, IYY, IZZ)); DEMSim.SetFamilyPrescribedAngVel(drum_family, "0", "0", to_string_with_precision(drum_ang_vel)); - // Add top and bottom planes at z = 0 and z = 0.1 m. They rotate with the drum family (axis-aligned so rotation - // does not change their normals). + // Add top and bottom planes at z = 0 and z = drum_height. They rotate with the drum family. auto end_caps = DEMSim.AddExternalObject(); - end_caps->AddPlane(make_float3(0, 0, drum_max.z), make_float3(0, 0, -1), mat_type_drum); - end_caps->AddPlane(make_float3(0, 0, drum_min.z), make_float3(0, 0, 1), mat_type_drum); + end_caps->AddPlane(make_float3(0, 0, drum_height), make_float3(0, 0, -1), mat_type_drum); + end_caps->AddPlane(make_float3(0, 0, 0), make_float3(0, 0, 1), mat_type_drum); end_caps->SetFamily(drum_family); - auto drum_tracker = DEMSim.Track(drum_mesh); + auto drum_tracker = DEMSim.Track(drum); auto cap_tracker = DEMSim.Track(end_caps); - // Sample 5000 particles inside the cylindrical volume with a small wall clearance. - const unsigned int target_particles = 5000; + // Sample particles inside the cylindrical volume with a small wall clearance. const float sample_radius = drum_inner_radius - wall_clearance - tri_radius; const float sample_halfheight = drum_height / 2.0f - wall_clearance - tri_radius; - HCPSampler sampler(tri_diag * 1.05f); + HCPSampler sampler(tri_diag * 1.01f); auto candidate_pos = - sampler.SampleCylinderZ(make_float3(0, 0, drum_min.z + drum_height / 2.0f), sample_radius, sample_halfheight); + sampler.SampleCylinderZ(make_float3(0, 0, drum_height / 2.0f), sample_radius, sample_halfheight); if (candidate_pos.size() < target_particles) { DEME_WARNING("Sampler produced fewer points (%zu) than requested (%u). Using all generated points.", candidate_pos.size(), target_particles); @@ -166,7 +163,8 @@ int main() { tri->SetMOI(particle_moi); tri->SetInitQuat(make_float4(0.f, 0.f, 0.f, 1.0f)); } - std::cout << "Placed " << candidate_pos.size() << " STL particles inside the drum." << std::endl; + const float total_particle_mass = particle_mass * candidate_pos.size(); + std::cout << "Placed " << candidate_pos.size() << " particles with a mass of "<< total_particle_mass <<" kg inside the drum." < vmax_rot) ? vmax_grav : vmax_rot; + DEMSim.SetExpandSafetyAdder(vmax); DEMSim.Initialize(); path out_dir = current_path(); diff --git a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp index 0662f106..4722c916 100644 --- a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp +++ b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp @@ -21,34 +21,27 @@ #include #include -#include -#include #include #include #include #include -#include #include using namespace deme; -using namespace std::filesystem; namespace { -constexpr bool kUseTriangleParticles = false; // toggle to run the STL-based triangle setup +constexpr bool kUseTriangleParticles = true; // toggle to run the STL-based triangle setup constexpr float kMmToMeters = 0.001f; constexpr double kTriangleParticleDensity = 2600.0; constexpr int kNumRuns = 10; -constexpr double kGap = 0.01; // 10 mm +constexpr double kGap = 0.005; // 0.5 mm constexpr double kSpeed = 1.0; // 1 m/s constexpr double kTimeStep = 1e-5; // seconds -constexpr int kMaxSteps = 200000; // 2 seconds max +constexpr int kMaxSteps = 100000; // 1 seconds max constexpr double kContactEps = 1e-6; // contact force threshold -constexpr bool kFixWinding = true; // flip inward-facing triangles based on CoM -constexpr bool kWriteFrames = true; -constexpr unsigned int kOutputFPS = 2000; -constexpr const char* kOutputDir = "DemoOutput_SimpleCollisions"; +double vmax = kSpeed; struct RunResult { bool ok = false; @@ -76,88 +69,6 @@ float3 vec_scale(const float3& v, double s) { return make_float3(v.x * s, v.y * s, v.z * s); } -std::pair compute_bounds(const std::vector& vertices) { - if (vertices.empty()) { - return {make_float3(0, 0, 0), make_float3(0, 0, 0)}; - } - float3 min_v = vertices.front(); - float3 max_v = vertices.front(); - for (const auto& v : vertices) { - min_v.x = std::min(min_v.x, v.x); - min_v.y = std::min(min_v.y, v.y); - min_v.z = std::min(min_v.z, v.z); - max_v.x = std::max(max_v.x, v.x); - max_v.y = std::max(max_v.y, v.y); - max_v.z = std::max(max_v.z, v.z); - } - return {min_v, max_v}; -} - -void print_mesh_diagnostics(const std::shared_ptr& mesh, const std::string& label) { - if (!mesh) { - return; - } - size_t boundary_edges = 0; - size_t nonmanifold_edges = 0; - bool watertight = mesh->IsWatertight(&boundary_edges, &nonmanifold_edges); - - double volume = 0.0; - float3 center = make_float3(0, 0, 0); - float3 inertia = make_float3(0, 0, 0); - mesh->ComputeMassProperties(volume, center, inertia); - - auto [min_v, max_v] = compute_bounds(mesh->GetCoordsVertices()); - float3 dims = max_v - min_v; - - std::cout << "\n[" << label << "] mesh diagnostics" << std::endl; - std::cout << "Vertices: " << mesh->GetNumNodes() << " Triangles: " << mesh->GetNumTriangles() << std::endl; - std::cout << "Bounds min=(" << min_v.x << ", " << min_v.y << ", " << min_v.z << ") max=(" << max_v.x << ", " - << max_v.y << ", " << max_v.z << ") dims=(" << dims.x << ", " << dims.y << ", " << dims.z << ")" - << std::endl; - std::cout << "Watertight: " << (watertight ? "yes" : "no") << " boundary_edges=" << boundary_edges - << " nonmanifold_edges=" << nonmanifold_edges << std::endl; - std::cout << "Volume=" << volume << " CoM=(" << center.x << ", " << center.y << ", " << center.z - << ") MOI(unit density, CoM)=(" << inertia.x << ", " << inertia.y << ", " << inertia.z << ")" - << std::endl; -} - -void diagnose_winding(const std::shared_ptr& mesh, - const std::string& label, - bool fix_winding) { - if (!mesh || mesh->m_face_v_indices.empty()) { - return; - } - double volume = 0.0; - float3 center = make_float3(0, 0, 0); - float3 inertia = make_float3(0, 0, 0); - mesh->ComputeMassProperties(volume, center, inertia); - if (volume == 0.0) { - center = make_float3(0, 0, 0); - } - - size_t inward = 0; - for (size_t i = 0; i < mesh->m_face_v_indices.size(); ++i) { - const int3& f = mesh->m_face_v_indices[i]; - const float3& v0 = mesh->m_vertices[f.x]; - const float3& v1 = mesh->m_vertices[f.y]; - const float3& v2 = mesh->m_vertices[f.z]; - const float3 n = face_normal(v0, v1, v2); - const float3 centroid = (v0 + v1 + v2) / 3.0f; - const float3 to_face = centroid - center; - const float d = dot(n, to_face); - if (d < 0.0f) { - inward++; - if (fix_winding) { - mesh->m_face_v_indices[i] = make_int3(f.x, f.z, f.y); - } - } - } - - std::cout << "\n[" << label << "] winding diagnostics" << std::endl; - std::cout << "Faces total=" << mesh->m_face_v_indices.size() << " inward=" << inward - << (fix_winding ? " (flipped)" : "") << std::endl; -} - Stats calc_stats(const std::vector& values) { Stats s; if (values.empty()) { @@ -242,18 +153,6 @@ std::shared_ptr load_triangle_template(DEMSolver& DEMSim, out_mass = static_cast(volume * kTriangleParticleDensity); out_moi = inertia * static_cast(kTriangleParticleDensity); - print_mesh_diagnostics(mesh_template, "simpleTriangleShape4mm.stl (scaled)"); - diagnose_winding(mesh_template, "simpleTriangleShape4mm.stl (scaled)", kFixWinding); - if (center.x != 0.0f || center.y != 0.0f || center.z != 0.0f) { - for (auto& v : mesh_template->m_vertices) { - v.x -= center.x; - v.y -= center.y; - v.z -= center.z; - } - std::cout << "[simpleTriangleShape4mm.stl] shifted vertices to CoM frame (" - << center.x << ", " << center.y << ", " << center.z << ")" << std::endl; - } - assign_patch_ids(mesh_template, per_triangle_patches, mat_type); return mesh_template; } @@ -267,12 +166,11 @@ RunResult run_single_collision(const float4& init_rot, DEMSolver DEMSim; DEMSim.SetOutputFormat(OUTPUT_FORMAT::CSV); - DEMSim.SetMeshOutputFormat("VTK"); DEMSim.InstructBoxDomainDimension(5, 5, 5); DEMSim.SetGravitationalAcceleration(make_float3(0, 0, 0)); - DEMSim.SetCDUpdateFreq(0); - DEMSim.UseAdaptiveUpdateFreq(false); DEMSim.SetMeshUniversalContact(true); + DEMSim.SetExpandSafetyType("auto"); + DEMSim.SetExpandSafetyAdder(vmax); auto mat_type = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}, {"Crr", 0.00}}); @@ -311,31 +209,10 @@ RunResult run_single_collision(const float4& init_rot, bool contact_started = false; bool rebound_captured = false; double peak_normal_force = 0.0; - unsigned int frame_id = 0; - double next_frame_time = 0.0; - path out_dir; - if (kWriteFrames) { - out_dir = current_path() / kOutputDir / label / ("run_" + std::to_string(run_id)); - create_directories(out_dir); - next_frame_time = 0.0; - char filename[128]; - std::snprintf(filename, sizeof(filename), "frame_%06u.vtk", frame_id++); - DEMSim.WriteMeshFile(out_dir / filename); - } for (int step = 0; step < kMaxSteps; ++step) { DEMSim.DoStepDynamics(); - if (kWriteFrames) { - double sim_time = DEMSim.GetSimTime(); - while (sim_time + 1e-12 >= next_frame_time) { - char filename[128]; - std::snprintf(filename, sizeof(filename), "frame_%06u.vtk", frame_id++); - DEMSim.WriteMeshFile(out_dir / filename); - next_frame_time += 1.0 / static_cast(kOutputFPS); - } - } - float3 plane_force = plane_tracker->ContactAcc(); plane_force = vec_scale(plane_force, plane_tracker->Mass()); double normal_force = std::abs(vec_dot(plane_force, plane_normal)); From a6d960705b53d7e1efba7a9384b41ded6c827d9a Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Thu, 29 Jan 2026 23:26:26 +0100 Subject: [PATCH 16/17] Switch to multi contact concept for convave shapes (default) - tri edge neighbors for contact islands - finer contact mesh is "winner" and defines the islands - convace demo change (cross.stl) to check function --- data/mesh/cross.stl | Bin 0 -> 2284 bytes data/mesh/cross_fine.stl | Bin 0 -> 17684 bytes src/DEM/API.h | 6 + src/DEM/APIPrivate.cpp | 180 ++++++- src/DEM/APIPublic.cpp | 5 + src/DEM/BdrsAndObjs.h | 12 + src/DEM/Defines.h | 14 + src/DEM/dT.cpp | 85 ++- src/DEM/dT.h | 27 + src/DEM/kT.cpp | 78 ++- src/DEM/kT.h | 28 + src/algorithms/DEMContactDetection.cu | 491 +++++++++++++++++- src/algorithms/DEMContactDetectionKernels.cuh | 331 +++++++++++- src/algorithms/DEMStaticDeviceSubroutines.h | 3 + src/demo/DEMdemo_DrumCubes.cpp | 2 + src/demo/DEMdemo_ResponseAngleMesh.cpp | 33 +- 16 files changed, 1218 insertions(+), 77 deletions(-) create mode 100644 data/mesh/cross.stl create mode 100644 data/mesh/cross_fine.stl diff --git a/data/mesh/cross.stl b/data/mesh/cross.stl new file mode 100644 index 0000000000000000000000000000000000000000..a0c19126e2d897ceea6d5a1df376d319e270105a GIT binary patch literal 2284 zcma)7OKwv^5S*jTg0PCXLYf58fEkGsP;e>iNP&lhJM z?=Ro(PP_c+>m@&2wr`S*>x!qJDgFM@!nvgM^pcX##Ma+4a7t86V}%nB10VpIczeIx6z#;i^P`;4dd#=KTUhOE*Qu^!iSS5I$y4`LGqa2vi~ zKD;g+1uk$Tm71&fuR)*Y%*?DT$9Sj!76q_oh|5?BHC{E9_o%LH;SBHEPZVj)kX6P? zbA7O*2i@3MA^M8~qKFQ2Y4NC((yuF72eOgzU8XbZSQJTWf#H2t>H<=~?Z7-eWQeWd z=qHM4%+Qspf&1_~tabl&BcYg@$`$%;s_KF&HdbnE|E!9*abSh-VW$!Aqq;(5V3hXG zJwv_^uA_7(tF;*UQ*%|%z<+&^sZWu4#)*Tjlx6>=P*z}{OH(I~SO@oLX3(Jxim9(d z8LAG<7WICSjrze@-j-2OSzw3f;oPmo7Yg6mS;4bdR2UVj+Nz%MLSem!uZcGk?7$0! zWWwFD?^;2op`Smf!WnoT6V3y_y+hVUW>i`zz~T-s75-k5j8zY0RUH^n`MuIJAU1Ie zEDB(*0~z81PJCy8+luS|2XO4Uvx!X7&b{~DH}k&Pyz;egeEkcr z{NKJl`Q1-!u>P3eYByus9^muF-EE`K56yl3d&lT6KLO)?MZE^!8T4-&{|ZJ|)Lz5K ze|PBh!~h#t4;mboth-gC0}fjNm&fYT?sHS?J z)1FbI@>z>Kz`kZkB%(33_l(-RvsM?3;|z+%jP{_B7!$RqZbw2eUYXrKBjNd3tF2F+ zhggs7A0CPFes!NmHR;B#$UVbRvlEvZ_ojw-yXz$lxDgc?G#R&5wp`G)-oXIUqv92h zggWl?e9AmAGRl2LFB#0E>A1Y^K96crE4UF=VmQik+A$s<^B}_KL(%*`%m+2bjEWd) znDuq$yqZvFwOT}#=iD@KIAji{*T+#=FXe&wd4)BjIcm>N8Xnb-;cTmPPmF~|o)0pC zw(;oms;nqJao~H_?kju8n5z~H<{@bFH?d%Jxmp-;zcR*7Lm7vtTw|{D(10SY7L9#% zH7{Xcd>Y5wXHJyp;HY++hBMOB;G(*57j4iP`q|INqo<#Jf@c(;I6(Qh^waaBX#C@c zACJ#`@TxH!MTYC`H8P{>IXf{rBUzv@enbd=&&V3{?i_hk2G}>PGU6GPw3CJ~@|@QQ zy~hrWGUwh)S=$&L0x-l|`fAR{)VtECInDXt*nTg0(4aGHpJ+ZVOLb?L@Qg9~%P$!N ztM)bXdEI?hSScD)*iKL_%^-X= zt!~!9eZ*r2Dm37$tr!zkwKD^E(5C&v-#D{Y7mW3Mv>Gimy4MQp&tiY${fdg52Jm9m zS1GC*NkrHP&-Zq&oQIF~AQ^)h;Tp6&Y%w(NpGN?Q7-ulc+2YM`iZyj9lFm z6(XYTYo%vrt#m$k)^m|{-t5f;j*j5bKY+uGp{uqj!(%7XePY#w(QTbKWysowpBePk zjG^lsXXSkfjvk4ubNd^NejODM5tYt4&s=3rlxq>IcA5FGdvF62zS>G7Vh74K_6+Pj zcu#D)PmEfDA$RR|)WAN3XPfR5Gb-zCsaBa$^)ed_lF+MLKS8yWIeax`WDR?M0Ku$7 z1HPKpsxl_3YG+37tBQ)cHIrD*dDMsdQy{&DXBsKnQT4#+Rx7x3Mb&XbZvYe#(Pv%A zlJ|)?@^4!067MOyBJ0rC2iKmRkB+}x!@FG~8{~&Z7xjaNN8}Yw46s*uIDpwdpn0i98^)*O1tghOWWN;O@fSCt_T$5670y1Up3q|3>yX_CB#q z6dA5}#i)7cp zCtOXsu_q1QhhZ{qn+%Sk=Kbq?9v*2P2_th~F2q{TjEr($(fmGWFprUHW&SRzK6hfQ zfHNHBIY9t+$UNo2>s~kdY7rI2=2)W9W>iG+{ImljX)rokV)RAki6L-fID5fBbTl8= ztDtB39hqZn$pgnd%GrcpYvk&3bhSg~yQqkwhO#>%aAPF0qU>L=e0-j#zhumH4wR2u zbJ}SXM%)<^8(H5Hl|%zZx+hjf(eR3TR6LqSr>AqU&s?$|o;6VsRTsuV!x`ym;87`~ zR5$LTLp4%V(Bj(iB&RXPgD?=gyDpzE$k#-rYx-B$nw|g(;ikL><#1L^2p;xPGA>C(TLcAat*JYM}=L*U`2JG7z~_?H{9^mG;{1+ zo{)hx_+mvxR2Ph@ccQ_!oEXES3RFw;0bfm`>NTo}1gOjgqkHC+F;P`JX~4~$IBRvm zNHbXVBElgG-J`<#lj`dF2o!hbsp`V<6_ui@kwk=z__(*jXD>axXFD3xBPM!m@ULL> zHN#gFvGfxIerP}^#%#akJ}xn$*XYp>44_vGV$%$E z{+*&q8eUyR@cT+b$E*=1MmG&cb+4klZmJ_1K+||ddUwI#^<8zHvD5IrB4e88BI|tE zn>!pm4<6;yHZx=Bs>M~l%kbD;!`Vu?(jY@tEk@Ej$XRK~oxxC9kN9Bdonh2GA_tr| z>%my&K~!iA{za3hy4@3pYDDb#`>11#r3TTF6MQvgVDEuBmw)qmucG9x4fL$AGO!Pk zv-Unw*ow*#LZf@;nNjslG`15|OA*3Xlc=_=QRg`{E*TS5wX=pjKL+QcqN2JxjguLa zfA40^w1#IIDO>JI(>#IEy}DWBqzXe6z@YWCw(}~2FWuws7!^IA>DH$WapNOBizC*g z_8fe(ZW=G?--3qEqiDeQx01%dfl*~7ulVml(XbIJBUVmJwOZ!^qGUcJiQgM1QS}tn zlZZe5Ps#JlnMeFGg@2KUX#D*7zu(!m?X9W75pLRB-~8atAKvNW<2u5TZ{)FH3}wHL z-RAc{r@o^3?bX$BD|^T2$`Z!#h%)04l@g9oku4)U{o;?AhmB@%93m>Yk~Nl$ZVaLi z437WKAAgEEGRJ9$S`E+XoH4tbW3k6*@K@^tSG-ofCWvjOg|}f<+DM{6!wF!H9sAT222V528JG`o2~~!}7!6W|7BGje4h^ z@8BJ;nNL)G8hkW<|GRI0UPsCd-e2cfnSI9i^}F}){Oq3}T2#zKSn@A(KloVg)@>?S zm*9+|GMn#ct{D1TBd>|fK4^?NAB^hA-ZSF4-Hgc*+SU5tTytz5{ml^l-MwT}^YtMt zGKMm5(Euut@Q-!dlJ$GJJ{p~}m$mA1KBC*3Yb@s@F*Fj(e(<=|*i<9e$KQ$Fv|oJh z!}gs&e`2 z`jhn`QE4O@sUw^}Yrz=auj*?bFMC(=s8y7#F~ZGy&Qnx57p+yTAqF>-b>6%?GU~QG z{0SgA$*f(?$3$u78e!IxfVzg|cb{YIR~m8WK|XoC=U&k9`AvZ26GN`+^h8w^If}dwP)yBE2*eNQ}Vdl zku~4{)HSi#$%sbn24?M1p6yHOolMXY;rSk}#GZF&r+L)aIXB*0yF8Es&AVM=uQ{FM z@i^^khwb=qq*m;G;ZNf)?oh`S)=Kt?@M+oCGhocQ9YFZg$9$rwH!U=Xv@61bFM8=b zSRMcE@`#QB!z1#Mz<`Cu6V;G8PrkDhZH$tKcZk%A5w#huvO>e+&9RG~oi7^O>*M~j z&%JF?jX5rV-*o;?#5uFhJbwGqZ~T|yFmzMYf&n6<;`8-|=N08~bKEn=U%u7-R~$9c z=Zpc3lt)HoHp9UOwvVbXn&mZ!blYcM7-P+-c@##QdAWi{6)T5h;^Uo2RQB;_PYmlN zL76Y?be`9QQTmEw(y|tPSeI|sYTh%3b~vxZ_)UGB>iEE-6Bm(? zo^<$lPQ$Q(fno}{SNCj5z43|5vx`i_QXj&CV34<^H>>L|A?wtg$I#tl~E(%>^a-; zof_Ql{_S7?d0z8~{qC^c4USeTk~$gbd5){O3p1N=wLL?3m^yBLGC2MiRo-W1c4p+L z7CE(&2Z;Ug{bzr~8I&t&2QYxdInM=9vqP6O9aB04{;s-UaXt>u4`bz=05#j+sb$uh z>F{Vi;2j@WopVKcFEO$dRVSW|WB2)8jSOcCSiTO7Hc^bo*~aLLsDe?oje*0y6Z5Lg zH6&D7%f1?{K%IX@^SjQuR@3J>S2{EDo*{RPvOUVoIQCMkkyiSuD=Vs-?(GrPm@5qE zVC*6R$|I2(w2{D|b!J}g?cNo6zUjldd>`f`&%s&m)~7+fJ9|%=hDsdgV|aaP4GoUN zVSRw1nb(RKqmxmi%8_{A?xU*Ds9dX@Q`V>qj_04u1_Ke%R(bf^CevqA!19F|OnZiW zx3dmus<~s}2_yefCI2#ps^b5b;rQp7&tEKcr7qSvq}W$w*M}&y+Yd9?0f*H*9L=a| jPKq{0{T($t{u*InlwF@>+o(}{!5Dp?M~z+TWh?5x>ct&S literal 0 HcmV?d00001 diff --git a/src/DEM/API.h b/src/DEM/API.h index 0a2aa32c..3ba4c82e 100644 --- a/src/DEM/API.h +++ b/src/DEM/API.h @@ -1905,6 +1905,8 @@ class DEMSolver { std::vector m_input_mesh_obj_xyz; std::vector m_input_mesh_obj_rot; std::vector m_input_mesh_obj_family; + std::vector m_input_mesh_obj_convex; + std::vector m_input_mesh_obj_never_winner; // Processed unique family prescription info std::vector m_unique_family_prescription; @@ -1940,6 +1942,10 @@ class DEMSolver { std::vector m_mesh_facet_owner; // Patch ID for each mesh facet, flattened std::vector m_mesh_facet_patch; + // Per-facet edge neighbors (global triangle indices, NULL_BODYID if boundary) + std::vector m_mesh_facet_neighbor1; + std::vector m_mesh_facet_neighbor2; + std::vector m_mesh_facet_neighbor3; // Three nodes of each triangle, flattened std::vector m_mesh_facets; diff --git a/src/DEM/APIPrivate.cpp b/src/DEM/APIPrivate.cpp index 5148f130..531c860f 100644 --- a/src/DEM/APIPrivate.cpp +++ b/src/DEM/APIPrivate.cpp @@ -13,13 +13,141 @@ #include #include #include +#include +#include #include #include #include #include +#include +#include namespace deme { +namespace { + +struct EdgeInfo { + size_t tri = 0; + int edge = 0; +}; + +struct QuantKey3 { + int64_t x, y, z; + bool operator==(const QuantKey3& o) const noexcept { return x == o.x && y == o.y && z == o.z; } +}; +struct QuantKey3Hash { + size_t operator()(const QuantKey3& k) const noexcept { + size_t h1 = std::hash{}(k.x); + size_t h2 = std::hash{}(k.y); + size_t h3 = std::hash{}(k.z); + size_t h = h1; + h ^= h2 + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + h ^= h3 + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + return h; + } +}; + +inline uint64_t makeEdgeKey(int a, int b) { + const uint32_t lo = static_cast(std::min(a, b)); + const uint32_t hi = static_cast(std::max(a, b)); + return (static_cast(lo) << 32) | static_cast(hi); +} + +static inline int64_t quantize(double v, double eps) { + return static_cast(std::llround(v / eps)); +} + +std::vector> buildTriangleEdgeNeighbors(const std::vector& face_v_indices, + const std::vector& vertices) { + const size_t n_faces = face_v_indices.size(); + std::vector> neighbors(n_faces, {NULL_BODYID, NULL_BODYID, NULL_BODYID}); + if (n_faces == 0) { + return neighbors; + } + + std::vector canon; + if (!vertices.empty()) { + double minx = vertices[0].x, miny = vertices[0].y, minz = vertices[0].z; + double maxx = minx, maxy = miny, maxz = minz; + for (const auto& v : vertices) { + minx = std::min(minx, (double)v.x); + miny = std::min(miny, (double)v.y); + minz = std::min(minz, (double)v.z); + maxx = std::max(maxx, (double)v.x); + maxy = std::max(maxy, (double)v.y); + maxz = std::max(maxz, (double)v.z); + } + const double dx = maxx - minx, dy = maxy - miny, dz = maxz - minz; + const double diag = std::sqrt(dx * dx + dy * dy + dz * dz); + const double eps = std::max(diag * 1e-9, 1e-12); + + std::unordered_map rep; + rep.reserve(vertices.size()); + canon.assign(vertices.size(), static_cast(-1)); + size_t next_id = 0; + for (size_t i = 0; i < vertices.size(); ++i) { + const auto& v = vertices[i]; + QuantKey3 key{quantize(v.x, eps), quantize(v.y, eps), quantize(v.z, eps)}; + auto it = rep.find(key); + if (it == rep.end()) { + rep.emplace(key, next_id); + canon[i] = next_id; + next_id++; + } else { + canon[i] = it->second; + } + } + } + + std::unordered_map> edge_map; + edge_map.reserve(n_faces * 3); + + for (size_t i = 0; i < n_faces; ++i) { + const int3& face = face_v_indices[i]; + const int v0_raw = face.x; + const int v1_raw = face.y; + const int v2_raw = face.z; + if (v0_raw < 0 || v1_raw < 0 || v2_raw < 0) { + continue; + } + int v0 = v0_raw; + int v1 = v1_raw; + int v2 = v2_raw; + if (!canon.empty()) { + if (static_cast(v0_raw) >= canon.size() || static_cast(v1_raw) >= canon.size() || + static_cast(v2_raw) >= canon.size()) { + continue; + } + v0 = static_cast(canon[static_cast(v0_raw)]); + v1 = static_cast(canon[static_cast(v1_raw)]); + v2 = static_cast(canon[static_cast(v2_raw)]); + } + if (v0 == v1 || v1 == v2 || v2 == v0) { + continue; + } + const uint64_t e0 = makeEdgeKey(v0, v1); + const uint64_t e1 = makeEdgeKey(v1, v2); + const uint64_t e2 = makeEdgeKey(v2, v0); + edge_map[e0].push_back(EdgeInfo{i, 0}); + edge_map[e1].push_back(EdgeInfo{i, 1}); + edge_map[e2].push_back(EdgeInfo{i, 2}); + } + + for (const auto& entry : edge_map) { + const auto& info = entry.second; + if (info.size() == 2) { + const EdgeInfo& a = info[0]; + const EdgeInfo& b = info[1]; + neighbors[a.tri][a.edge] = static_cast(b.tri); + neighbors[b.tri][b.edge] = static_cast(a.tri); + } + } + + return neighbors; +} + +} // namespace + void DEMSolver::assertSysInit(const std::string& method_name) { if (!sys_initialized) { DEME_ERROR("DEMSolver's method %s can only be called after calling Initialize()", method_name.c_str()); @@ -840,17 +968,38 @@ void DEMSolver::preprocessTriangleObjs() { m_input_mesh_obj_xyz.push_back(mesh_obj->init_pos); m_input_mesh_obj_rot.push_back(mesh_obj->init_oriQ); m_input_mesh_obj_family.push_back(mesh_obj->family_code); + m_input_mesh_obj_convex.push_back(mesh_obj->is_convex ? 1 : 0); + m_input_mesh_obj_never_winner.push_back(mesh_obj->never_winner ? 1 : 0); m_mesh_facet_owner.insert(m_mesh_facet_owner.end(), mesh_obj->GetNumTriangles(), thisMeshObj); - // Initialize patch IDs if not already set (default: all facets in patch 0) - if (!mesh_obj->patches_explicitly_set && mesh_obj->m_patch_ids.empty()) { - mesh_obj->SetPatchIDs({0}); + const bodyID_t tri_offset = static_cast(m_mesh_facets.size()); + const auto local_neighbors = buildTriangleEdgeNeighbors(mesh_obj->m_face_v_indices, mesh_obj->m_vertices); + + // Force single-patch semantics: one patch per mesh (all facets in patch 0) + if (mesh_obj->patches_explicitly_set || mesh_obj->GetNumPatches() > 1) { + DEME_WARNING( + "Mesh patch IDs were provided or computed, but single-patch mode is enabled; all facets will be " + "assigned to one patch."); + } + if (mesh_obj->GetNumTriangles() > 0) { + mesh_obj->m_patch_ids.assign(mesh_obj->GetNumTriangles(), 0); + } else { + mesh_obj->m_patch_ids.clear(); + } + mesh_obj->nPatches = 1; + mesh_obj->patches_explicitly_set = true; + mesh_obj->m_patch_locations.clear(); + mesh_obj->patch_locations_explicitly_set = false; + if (mesh_obj->materials.size() != 1 && !mesh_obj->materials.empty()) { + auto mat = mesh_obj->materials[0]; + mesh_obj->materials.assign(1, mat); + DEME_WARNING("Mesh provided multiple patch materials; single-patch mode keeps only the first material."); } // Populate patch owner and material arrays (one entry per patch in this mesh) // Note patch_id in a mesh is always 0-based, and contiguous std::vector patch_materials(mesh_obj->GetNumPatches()); - for (size_t facet_idx = 0; facet_idx < mesh_obj->GetNumPatches(); facet_idx++) { + for (size_t facet_idx = 0; facet_idx < mesh_obj->GetNumTriangles(); facet_idx++) { // patch_id is per-triangle bodyID_t patch_id = mesh_obj->m_patch_ids.at(facet_idx); // Assign this facet's material to its patch (will overwrite for each facet, but they should be consistent @@ -886,6 +1035,11 @@ void DEMSolver::preprocessTriangleObjs() { } } m_mesh_facets.push_back(tri); + + const auto& nb = local_neighbors[i]; + m_mesh_facet_neighbor1.push_back(nb[0] == NULL_BODYID ? NULL_BODYID : nb[0] + tri_offset); + m_mesh_facet_neighbor2.push_back(nb[1] == NULL_BODYID ? NULL_BODYID : nb[1] + tri_offset); + m_mesh_facet_neighbor3.push_back(nb[2] == NULL_BODYID ? NULL_BODYID : nb[2] + tri_offset); } thisLoadPatchCount += mesh_obj->GetNumPatches(); @@ -1345,8 +1499,10 @@ void DEMSolver::initializeGPUArrays() { // Analytical objects' initial stats m_input_ext_obj_xyz, m_input_ext_obj_rot, m_input_ext_obj_family, // Meshed objects' initial stats - cached_mesh_objs, m_input_mesh_obj_xyz, m_input_mesh_obj_rot, m_input_mesh_obj_family, m_mesh_facet_owner, - m_mesh_facet_patch, m_mesh_facets, m_mesh_patch_owner, m_mesh_patch_materials, + cached_mesh_objs, m_input_mesh_obj_xyz, m_input_mesh_obj_rot, m_input_mesh_obj_family, + m_input_mesh_obj_convex, m_input_mesh_obj_never_winner, m_mesh_facet_owner, m_mesh_facet_patch, + m_mesh_facet_neighbor1, m_mesh_facet_neighbor2, m_mesh_facet_neighbor3, m_mesh_facets, m_mesh_patch_owner, + m_mesh_patch_materials, // Clump template name mapping m_template_number_name_map, // Clump template info (mass, sphere components, materials etc.) @@ -1368,7 +1524,8 @@ void DEMSolver::initializeGPUArrays() { // Analytical objects' initial stats m_input_ext_obj_family, // Meshed objects' initial stats - m_input_mesh_obj_family, m_mesh_facet_owner, m_mesh_facet_patch, m_mesh_facets, + m_input_mesh_obj_family, m_input_mesh_obj_convex, m_input_mesh_obj_never_winner, m_mesh_facet_owner, + m_mesh_facet_patch, m_mesh_facet_neighbor1, m_mesh_facet_neighbor2, m_mesh_facet_neighbor3, m_mesh_facets, // Analytical obj physics properties m_ext_obj_comp_num, // Family mask @@ -1398,8 +1555,10 @@ void DEMSolver::updateClumpMeshArrays(size_t nOwners, // Analytical objects' initial stats m_input_ext_obj_xyz, m_input_ext_obj_rot, m_input_ext_obj_family, // Meshed objects' initial stats - cached_mesh_objs, m_input_mesh_obj_xyz, m_input_mesh_obj_rot, m_input_mesh_obj_family, m_mesh_facet_owner, - m_mesh_facet_patch, m_mesh_facets, m_mesh_patch_owner, m_mesh_patch_materials, + cached_mesh_objs, m_input_mesh_obj_xyz, m_input_mesh_obj_rot, m_input_mesh_obj_family, + m_input_mesh_obj_convex, m_input_mesh_obj_never_winner, m_mesh_facet_owner, m_mesh_facet_patch, + m_mesh_facet_neighbor1, m_mesh_facet_neighbor2, m_mesh_facet_neighbor3, m_mesh_facets, m_mesh_patch_owner, + m_mesh_patch_materials, // Clump template info (mass, sphere components, materials etc.) flattened_clump_templates, // Analytical obj physics properties @@ -1420,7 +1579,8 @@ void DEMSolver::updateClumpMeshArrays(size_t nOwners, // Analytical objects' initial stats m_input_ext_obj_family, // Meshed objects' initial stats - m_input_mesh_obj_family, m_mesh_facet_owner, m_mesh_facet_patch, m_mesh_facets, + m_input_mesh_obj_family, m_input_mesh_obj_convex, m_input_mesh_obj_never_winner, m_mesh_facet_owner, + m_mesh_facet_patch, m_mesh_facet_neighbor1, m_mesh_facet_neighbor2, m_mesh_facet_neighbor3, m_mesh_facets, // Analytical obj physics properties m_ext_obj_comp_num, // Family mask diff --git a/src/DEM/APIPublic.cpp b/src/DEM/APIPublic.cpp index de4d0b8c..e79e8ee0 100644 --- a/src/DEM/APIPublic.cpp +++ b/src/DEM/APIPublic.cpp @@ -2389,6 +2389,8 @@ void DEMSolver::ReleaseFlattenedArrays() { deallocate_array(m_input_mesh_obj_xyz); deallocate_array(m_input_mesh_obj_rot); deallocate_array(m_input_mesh_obj_family); + deallocate_array(m_input_mesh_obj_convex); + deallocate_array(m_input_mesh_obj_never_winner); deallocate_array(m_unique_family_prescription); deallocate_array(m_input_clump_family); @@ -2404,6 +2406,9 @@ void DEMSolver::ReleaseFlattenedArrays() { deallocate_array(m_mesh_facet_owner); deallocate_array(m_mesh_facet_patch); + deallocate_array(m_mesh_facet_neighbor1); + deallocate_array(m_mesh_facet_neighbor2); + deallocate_array(m_mesh_facet_neighbor3); deallocate_array(m_mesh_facets); deallocate_array(m_mesh_patch_owner); deallocate_array(m_mesh_patch_materials); diff --git a/src/DEM/BdrsAndObjs.h b/src/DEM/BdrsAndObjs.h index 68b2bfb0..6a848f5e 100644 --- a/src/DEM/BdrsAndObjs.h +++ b/src/DEM/BdrsAndObjs.h @@ -370,6 +370,10 @@ class DEMMesh : public DEMInitializer { // If true, when the mesh is initialized into the system, it will re-order the nodes of each triangle so that the // normals derived from right-hand-rule are the same as the normals in the mesh file bool use_mesh_normals = false; + // If true, this mesh is treated as convex for contact island reduction. + bool is_convex = false; + // If true, this mesh is never selected as the winner side for island labeling. + bool never_winner = false; DEMMesh() { obj_type = OWNER_TYPE::MESH; } DEMMesh(std::string input_file) { @@ -407,6 +411,14 @@ class DEMMesh : public DEMInitializer { /// Instruct that when the mesh is initialized into the system, it will re-order the nodes of each triangle so that /// the normals derived from right-hand-rule are the same as the normals in the mesh file void UseNormals(bool use = true) { use_mesh_normals = use; } + /// Mark this mesh as convex for contact reduction purposes. + void SetConvex(bool convex = true) { is_convex = convex; } + /// Query whether this mesh is marked convex. + bool IsConvex() const { return is_convex; } + /// Prevent this mesh from ever being chosen as the winner side in island labeling. + void SetNeverWinner(bool never = true) { never_winner = never; } + /// Query whether this mesh is marked as never-winner. + bool IsNeverWinner() const { return never_winner; } /// Access the n-th triangle in mesh DEMTriangle GetTriangle(size_t index) const { // No need to wrap (for Shlok) diff --git a/src/DEM/Defines.h b/src/DEM/Defines.h index d8dfe9f2..a9c7f799 100644 --- a/src/DEM/Defines.h +++ b/src/DEM/Defines.h @@ -363,6 +363,7 @@ struct DEMDataDT { bodyID_t* idPatchA; bodyID_t* idPatchB; contact_t* contactTypePatch; + bodyID_t* contactPatchIsland; contactPairs_t* contactMapping; // Family mask @@ -388,7 +389,12 @@ struct DEMDataDT { bodyID_t* ownerTriMesh; bodyID_t* ownerPatchMesh; bodyID_t* ownerAnalBody; + notStupidBool_t* ownerMeshConvex; + notStupidBool_t* ownerMeshNeverWinner; bodyID_t* triPatchID; + bodyID_t* triNeighbor1; + bodyID_t* triNeighbor2; + bodyID_t* triNeighbor3; float3* relPosNode1; float3* relPosNode2; float3* relPosNode3; @@ -464,7 +470,12 @@ struct DEMDataKT { clumpComponentOffsetExt_t* clumpComponentOffsetExt; bodyID_t* ownerTriMesh; bodyID_t* ownerAnalBody; + notStupidBool_t* ownerMeshConvex; + notStupidBool_t* ownerMeshNeverWinner; bodyID_t* triPatchID; + bodyID_t* triNeighbor1; + bodyID_t* triNeighbor2; + bodyID_t* triNeighbor3; float3* relPosNode1; float3* relPosNode2; float3* relPosNode3; @@ -486,6 +497,8 @@ struct DEMDataKT { bodyID_t* previous_idPatchB; contact_t* contactTypePatch; contact_t* previous_contactTypePatch; + bodyID_t* contactPatchIsland; + bodyID_t* previous_contactPatchIsland; contactPairs_t* geomToPatchMap; // data pointers that is kT's transfer destination @@ -500,6 +513,7 @@ struct DEMDataKT { bodyID_t* pDTOwnedBuffer_idPatchA = nullptr; bodyID_t* pDTOwnedBuffer_idPatchB = nullptr; contact_t* pDTOwnedBuffer_contactTypePatch = nullptr; + bodyID_t* pDTOwnedBuffer_contactPatchIsland = nullptr; contactPairs_t* pDTOwnedBuffer_geomToPatchMap = nullptr; // The collection of pointers to DEM template arrays such as radiiSphere, still useful when there are template info diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index 4163ee8b..9ad0a201 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -82,6 +82,7 @@ void DEMDynamicThread::packDataPointers() { idPatchA.bindDevicePointer(&(granData->idPatchA)); idPatchB.bindDevicePointer(&(granData->idPatchB)); contactTypePatch.bindDevicePointer(&(granData->contactTypePatch)); + contactPatchIsland.bindDevicePointer(&(granData->contactPatchIsland)); familyMaskMatrix.bindDevicePointer(&(granData->familyMasks)); familyExtraMarginSize.bindDevicePointer(&(granData->familyExtraMarginSize)); @@ -115,8 +116,13 @@ void DEMDynamicThread::packDataPointers() { // Mesh and analytical-related ownerTriMesh.bindDevicePointer(&(granData->ownerTriMesh)); + ownerMeshConvex.bindDevicePointer(&(granData->ownerMeshConvex)); + ownerMeshNeverWinner.bindDevicePointer(&(granData->ownerMeshNeverWinner)); ownerPatchMesh.bindDevicePointer(&(granData->ownerPatchMesh)); triPatchID.bindDevicePointer(&(granData->triPatchID)); + triNeighbor1.bindDevicePointer(&(granData->triNeighbor1)); + triNeighbor2.bindDevicePointer(&(granData->triNeighbor2)); + triNeighbor3.bindDevicePointer(&(granData->triNeighbor3)); ownerAnalBody.bindDevicePointer(&(granData->ownerAnalBody)); relPosNode1.bindDevicePointer(&(granData->relPosNode1)); relPosNode2.bindDevicePointer(&(granData->relPosNode2)); @@ -245,6 +251,7 @@ void DEMDynamicThread::migrateDataToDevice() { contactTypePatch.toDeviceAsync(streamInfo.stream); idPatchA.toDeviceAsync(streamInfo.stream); idPatchB.toDeviceAsync(streamInfo.stream); + contactPatchIsland.toDeviceAsync(streamInfo.stream); familyMaskMatrix.toDeviceAsync(streamInfo.stream); familyExtraMarginSize.toDeviceAsync(streamInfo.stream); @@ -273,8 +280,13 @@ void DEMDynamicThread::migrateDataToDevice() { volumeOwnerBody.toDeviceAsync(streamInfo.stream); ownerTriMesh.toDeviceAsync(streamInfo.stream); + ownerMeshConvex.toDeviceAsync(streamInfo.stream); + ownerMeshNeverWinner.toDeviceAsync(streamInfo.stream); ownerPatchMesh.toDeviceAsync(streamInfo.stream); triPatchID.toDeviceAsync(streamInfo.stream); + triNeighbor1.toDeviceAsync(streamInfo.stream); + triNeighbor2.toDeviceAsync(streamInfo.stream); + triNeighbor3.toDeviceAsync(streamInfo.stream); ownerAnalBody.toDeviceAsync(streamInfo.stream); relPosNode1.toDeviceAsync(streamInfo.stream); relPosNode2.toDeviceAsync(streamInfo.stream); @@ -343,6 +355,7 @@ void DEMDynamicThread::migrateContactInfoToHost() { contactTypePatch.toHost(); idPatchA.toHost(); idPatchB.toHost(); + contactPatchIsland.toHost(); // Contact results contactForces.toHost(); @@ -599,6 +612,8 @@ void DEMDynamicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(alphaZ, nOwnerBodies, 0); DEME_DUAL_ARRAY_RESIZE(accSpecified, nOwnerBodies, 0); DEME_DUAL_ARRAY_RESIZE(angAccSpecified, nOwnerBodies, 0); + DEME_DUAL_ARRAY_RESIZE(ownerMeshConvex, nOwnerBodies, 0); + DEME_DUAL_ARRAY_RESIZE(ownerMeshNeverWinner, nOwnerBodies, 0); // Resize the family mask `matrix' (in fact it is flattened) DEME_DUAL_ARRAY_RESIZE(familyMaskMatrix, (NUM_AVAL_FAMILIES + 1) * NUM_AVAL_FAMILIES / 2, DONT_PREVENT_CONTACT); @@ -630,6 +645,9 @@ void DEMDynamicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(relPosNode2, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(relPosNode3, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(triPatchID, nTriGM, 0); + DEME_DUAL_ARRAY_RESIZE(triNeighbor1, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor2, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor3, nTriGM, NULL_BODYID); // Resize to the number of mesh patches DEME_DUAL_ARRAY_RESIZE(ownerPatchMesh, nMeshPatches, 0); @@ -797,8 +815,13 @@ void DEMDynamicThread::populateEntityArrays(const std::vector& input_mesh_obj_xyz, const std::vector& input_mesh_obj_rot, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& mesh_facet_owner, const std::vector& mesh_facet_patch, + const std::vector& mesh_facet_neighbor1, + const std::vector& mesh_facet_neighbor2, + const std::vector& mesh_facet_neighbor3, const std::vector& mesh_facets, const std::vector& mesh_patch_owner, const std::vector& mesh_patch_materials, @@ -1192,14 +1215,20 @@ void DEMDynamicThread::populateEntityArrays(const std::vectorClearWildcards(); @@ -1289,8 +1318,13 @@ void DEMDynamicThread::initGPUArrays(const std::vector& input_mesh_obj_xyz, const std::vector& input_mesh_obj_rot, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& mesh_facet_owner, const std::vector& mesh_facet_patch, + const std::vector& mesh_facet_neighbor1, + const std::vector& mesh_facet_neighbor2, + const std::vector& mesh_facet_neighbor3, const std::vector& mesh_facets, const std::vector& mesh_patch_owner, const std::vector& mesh_patch_materials, @@ -1318,9 +1352,10 @@ void DEMDynamicThread::initGPUArrays(const std::vector& input_mesh_obj_xyz, const std::vector& input_mesh_obj_rot, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& mesh_facet_owner, const std::vector& mesh_facet_patch, + const std::vector& mesh_facet_neighbor1, + const std::vector& mesh_facet_neighbor2, + const std::vector& mesh_facet_neighbor3, const std::vector& mesh_facets, const std::vector& mesh_patch_owner, const std::vector& mesh_patch_materials, @@ -1366,10 +1406,11 @@ void DEMDynamicThread::updateClumpMeshArrays(const std::vectoridPatchA, idPatchA_buffer[read_idx].data(), nPatch * sizeof(bodyID_t)); xu.add(granData->idPatchB, idPatchB_buffer[read_idx].data(), nPatch * sizeof(bodyID_t)); xu.add(granData->contactTypePatch, contactTypePatch_buffer[read_idx].data(), nPatch * sizeof(contact_t)); + xu.add(granData->contactPatchIsland, contactPatchIsland_buffer[read_idx].data(), nPatch * sizeof(bodyID_t)); } if (!solverFlags.isHistoryless) { @@ -2593,6 +2637,7 @@ inline void DEMDynamicThread::unpackMyBuffer() { kT->granData->pDTOwnedBuffer_idPatchA = idPatchA_buffer[kt_write_buf].data(); kT->granData->pDTOwnedBuffer_idPatchB = idPatchB_buffer[kt_write_buf].data(); kT->granData->pDTOwnedBuffer_contactTypePatch = contactTypePatch_buffer[kt_write_buf].data(); + kT->granData->pDTOwnedBuffer_contactPatchIsland = contactPatchIsland_buffer[kt_write_buf].data(); if (!solverFlags.isHistoryless) { kT->granData->pDTOwnedBuffer_contactMapping = contactMapping_buffer[kt_write_buf].data(); } @@ -3185,21 +3230,31 @@ inline void DEMDynamicThread::unpack_impl() { // entry.second.second); // } - // Now for patch-based contacts, we do the same thing. Note the unique types herein will be the same as thosein. - cubRunLengthEncode(granData->contactTypePatch, existingContactTypes.device(), typeCounts, + // Now for patch-based contacts, we do the same thing. Keep primitive type list intact. + contact_t* patchTypesDevice = (contact_t*)solverScratchSpace.allocateTempVector( + "patchContactTypes", (NUM_SUPPORTED_CONTACT_TYPES + 1) * sizeof(contact_t)); + cubRunLengthEncode(granData->contactTypePatch, patchTypesDevice, typeCounts, solverScratchSpace.getDualStructDevice("numExistingTypes"), *solverScratchSpace.numContacts, streamInfo.stream, solverScratchSpace); - cubPrefixScan(typeCounts, typeStartOffsetsPatch.device(), m_numExistingTypes, + solverScratchSpace.syncDualStructDeviceToHost("numExistingTypes"); + size_t numPatchTypes = *solverScratchSpace.getDualStructHost("numExistingTypes"); + cubPrefixScan(typeCounts, typeStartOffsetsPatch.device(), numPatchTypes, streamInfo.stream, solverScratchSpace); typeStartOffsetsPatch.toHost(); + std::vector patchTypesHost(numPatchTypes); + if (numPatchTypes > 0) { + DEME_GPU_CALL(cudaMemcpy(patchTypesHost.data(), patchTypesDevice, numPatchTypes * sizeof(contact_t), + cudaMemcpyDeviceToHost)); + } typeStartCountPatchMap.SetAll({0, 0}); - for (size_t i = 0; i < m_numExistingTypes; i++) { - typeStartCountPatchMap[existingContactTypes[i]] = std::make_pair( - typeStartOffsetsPatch[i], (i + 1 < m_numExistingTypes ? typeStartOffsetsPatch[i + 1] - : (contactPairs_t)*solverScratchSpace.numContacts) - + for (size_t i = 0; i < numPatchTypes; i++) { + typeStartCountPatchMap[patchTypesHost[i]] = std::make_pair( + typeStartOffsetsPatch[i], (i + 1 < numPatchTypes ? typeStartOffsetsPatch[i + 1] + : (contactPairs_t)*solverScratchSpace.numContacts) - typeStartOffsetsPatch[i]); } + solverScratchSpace.finishUsingTempVector("patchContactTypes"); solverScratchSpace.finishUsingTempVector("typeCounts"); solverScratchSpace.finishUsingDualStruct("numExistingTypes"); diff --git a/src/DEM/dT.h b/src/DEM/dT.h index e3ffbe50..4406b56b 100644 --- a/src/DEM/dT.h +++ b/src/DEM/dT.h @@ -263,6 +263,8 @@ class DEMDynamicThread { DeviceArray(&m_approxDeviceBytesUsed)}; DeviceArray contactTypePatch_buffer[2] = {DeviceArray(&m_approxDeviceBytesUsed), DeviceArray(&m_approxDeviceBytesUsed)}; + DeviceArray contactPatchIsland_buffer[2] = {DeviceArray(&m_approxDeviceBytesUsed), + DeviceArray(&m_approxDeviceBytesUsed)}; DeviceArray geomToPatchMap_buffer[2] = {DeviceArray(&m_approxDeviceBytesUsed), DeviceArray(&m_approxDeviceBytesUsed)}; DeviceArray contactMapping_buffer[2] = {DeviceArray(&m_approxDeviceBytesUsed), @@ -396,6 +398,7 @@ class DEMDynamicThread { DualArray idPatchA = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray idPatchB = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray contactTypePatch = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray contactPatchIsland = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray geomToPatchMap = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); @@ -456,9 +459,18 @@ class DEMDynamicThread { DualArray ownerClumpBody = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray ownerTriMesh = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray ownerAnalBody = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + // Mesh owner flags (indexed by owner body ID) + DualArray ownerMeshConvex = + DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray ownerMeshNeverWinner = + DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); // Mesh patch information: each facet belongs to a patch, and each patch has material properties // Patch ID for each triangle facet (maps facet to patch) DualArray triPatchID = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + // Triangle edge neighbors (global triangle indices; NULL_BODYID for boundary) + DualArray triNeighbor1 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray triNeighbor2 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray triNeighbor3 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); // Mesh patch owner IDs (one per patch, flattened across all meshes) DualArray ownerPatchMesh = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); @@ -749,8 +761,13 @@ class DEMDynamicThread { const std::vector& input_mesh_obj_xyz, const std::vector& input_mesh_obj_rot, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& mesh_facet_owner, const std::vector& mesh_facet_patch, + const std::vector& mesh_facet_neighbor1, + const std::vector& mesh_facet_neighbor2, + const std::vector& mesh_facet_neighbor3, const std::vector& mesh_facets, const std::vector& mesh_patch_owner, const std::vector& mesh_patch_materials, @@ -784,8 +801,13 @@ class DEMDynamicThread { const std::vector& input_mesh_obj_xyz, const std::vector& input_mesh_obj_rot, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& mesh_facet_owner, const std::vector& mesh_facet_patch, + const std::vector& mesh_facet_neighbor1, + const std::vector& mesh_facet_neighbor2, + const std::vector& mesh_facet_neighbor3, const std::vector& mesh_facets, const std::vector& mesh_patch_owner, const std::vector& mesh_patch_materials, @@ -814,8 +836,13 @@ class DEMDynamicThread { const std::vector& input_mesh_obj_xyz, const std::vector& input_mesh_obj_rot, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& mesh_facet_owner, const std::vector& mesh_facet_patch, + const std::vector& mesh_facet_neighbor1, + const std::vector& mesh_facet_neighbor2, + const std::vector& mesh_facet_neighbor3, const std::vector& mesh_facets, const std::vector& mesh_patch_owner, const std::vector& mesh_patch_materials, diff --git a/src/DEM/kT.cpp b/src/DEM/kT.cpp index ab8c824b..e7c5eb03 100644 --- a/src/DEM/kT.cpp +++ b/src/DEM/kT.cpp @@ -80,6 +80,7 @@ inline void DEMKinematicThread::transferPatchArrayResize(int buffer_idx, size_t DEME_DEVICE_ARRAY_RESIZE(dT->idPatchA_buffer[buffer_idx], nContactPairs); DEME_DEVICE_ARRAY_RESIZE(dT->idPatchB_buffer[buffer_idx], nContactPairs); DEME_DEVICE_ARRAY_RESIZE(dT->contactTypePatch_buffer[buffer_idx], nContactPairs); + DEME_DEVICE_ARRAY_RESIZE(dT->contactPatchIsland_buffer[buffer_idx], nContactPairs); if (!solverFlags.isHistoryless) { DEME_DEVICE_ARRAY_RESIZE(dT->contactMapping_buffer[buffer_idx], nContactPairs); granData->pDTOwnedBuffer_contactMapping = dT->contactMapping_buffer[buffer_idx].data(); @@ -87,6 +88,7 @@ inline void DEMKinematicThread::transferPatchArrayResize(int buffer_idx, size_t granData->pDTOwnedBuffer_idPatchA = dT->idPatchA_buffer[buffer_idx].data(); granData->pDTOwnedBuffer_idPatchB = dT->idPatchB_buffer[buffer_idx].data(); granData->pDTOwnedBuffer_contactTypePatch = dT->contactTypePatch_buffer[buffer_idx].data(); + granData->pDTOwnedBuffer_contactPatchIsland = dT->contactPatchIsland_buffer[buffer_idx].data(); // Unset the device change we just made DEME_GPU_CALL(cudaSetDevice(streamInfo.device)); @@ -378,6 +380,8 @@ inline void DEMKinematicThread::sendToTheirBuffer() { resize_patch = DEME_MAX(resize_patch, idPatchA.size()); resize_patch = DEME_MAX(resize_patch, idPatchB.size()); resize_patch = DEME_MAX(resize_patch, contactTypePatch.size()); + // Keep patch-side buffers in lockstep; missing one can corrupt swap/copies and crash kernels. + resize_patch = DEME_MAX(resize_patch, contactPatchIsland.size()); if (!solverFlags.isHistoryless) { resize_patch = DEME_MAX(resize_patch, contactMapping.size()); } @@ -389,7 +393,8 @@ inline void DEMKinematicThread::sendToTheirBuffer() { resize_prim > dT->geomToPatchMap_buffer[write_idx].size(); bool need_resize_patch = resize_patch > dT->idPatchA_buffer[write_idx].size() || resize_patch > dT->idPatchB_buffer[write_idx].size() || - resize_patch > dT->contactTypePatch_buffer[write_idx].size(); + resize_patch > dT->contactTypePatch_buffer[write_idx].size() || + resize_patch > dT->contactPatchIsland_buffer[write_idx].size(); if (!solverFlags.isHistoryless) { need_resize_patch = need_resize_patch || (resize_patch > dT->contactMapping_buffer[write_idx].size()); } @@ -411,6 +416,8 @@ inline void DEMKinematicThread::sendToTheirBuffer() { output_swapped = swap_device_buffer(idPatchA, dT->idPatchA_buffer[write_idx]) && output_swapped; output_swapped = swap_device_buffer(idPatchB, dT->idPatchB_buffer[write_idx]) && output_swapped; output_swapped = swap_device_buffer(contactTypePatch, dT->contactTypePatch_buffer[write_idx]) && output_swapped; + output_swapped = + swap_device_buffer(contactPatchIsland, dT->contactPatchIsland_buffer[write_idx]) && output_swapped; if (!solverFlags.isHistoryless) { output_swapped = swap_device_buffer(contactMapping, dT->contactMapping_buffer[write_idx]) && output_swapped; } @@ -424,6 +431,7 @@ inline void DEMKinematicThread::sendToTheirBuffer() { granData->pDTOwnedBuffer_idPatchA = dT->idPatchA_buffer[write_idx].data(); granData->pDTOwnedBuffer_idPatchB = dT->idPatchB_buffer[write_idx].data(); granData->pDTOwnedBuffer_contactTypePatch = dT->contactTypePatch_buffer[write_idx].data(); + granData->pDTOwnedBuffer_contactPatchIsland = dT->contactPatchIsland_buffer[write_idx].data(); if (!solverFlags.isHistoryless) { granData->pDTOwnedBuffer_contactMapping = dT->contactMapping_buffer[write_idx].data(); } @@ -458,6 +466,8 @@ inline void DEMKinematicThread::sendToTheirBuffer() { xs.add(dT->idPatchA_buffer[write_idx].data(), granData->idPatchA, nPatch * sizeof(bodyID_t)); xs.add(dT->idPatchB_buffer[write_idx].data(), granData->idPatchB, nPatch * sizeof(bodyID_t)); xs.add(dT->contactTypePatch_buffer[write_idx].data(), granData->contactTypePatch, nPatch * sizeof(contact_t)); + xs.add(dT->contactPatchIsland_buffer[write_idx].data(), granData->contactPatchIsland, + nPatch * sizeof(bodyID_t)); if (!solverFlags.isHistoryless) { xs.add(dT->contactMapping_buffer[write_idx].data(), granData->contactMapping, nPatch * sizeof(contactPairs_t)); @@ -536,8 +546,8 @@ void DEMKinematicThread::workerThread() { contactTypePrimitive, previous_idPrimitiveA, previous_idPrimitiveB, previous_contactTypePrimitive, contactPersistency, contactMapping, idPatchA, idPatchB, previous_idPatchA, previous_idPatchB, contactTypePatch, previous_contactTypePatch, - typeStartCountPatchMap, geomToPatchMap, streamInfo.stream, solverScratchSpace, timers, - stateParams); + contactPatchIsland, previous_contactPatchIsland, typeStartCountPatchMap, geomToPatchMap, + streamInfo.stream, solverScratchSpace, timers, stateParams); CDAccumTimer.End(); timers.GetTimer("Send to dT buffer").start(); @@ -703,6 +713,8 @@ void DEMKinematicThread::packDataPointers() { previous_idPatchB.bindDevicePointer(&(granData->previous_idPatchB)); contactTypePatch.bindDevicePointer(&(granData->contactTypePatch)); previous_contactTypePatch.bindDevicePointer(&(granData->previous_contactTypePatch)); + contactPatchIsland.bindDevicePointer(&(granData->contactPatchIsland)); + previous_contactPatchIsland.bindDevicePointer(&(granData->previous_contactPatchIsland)); geomToPatchMap.bindDevicePointer(&(granData->geomToPatchMap)); familyMaskMatrix.bindDevicePointer(&(granData->familyMasks)); @@ -716,7 +728,12 @@ void DEMKinematicThread::packDataPointers() { // Mesh-related ownerTriMesh.bindDevicePointer(&(granData->ownerTriMesh)); + ownerMeshConvex.bindDevicePointer(&(granData->ownerMeshConvex)); + ownerMeshNeverWinner.bindDevicePointer(&(granData->ownerMeshNeverWinner)); triPatchID.bindDevicePointer(&(granData->triPatchID)); + triNeighbor1.bindDevicePointer(&(granData->triNeighbor1)); + triNeighbor2.bindDevicePointer(&(granData->triNeighbor2)); + triNeighbor3.bindDevicePointer(&(granData->triNeighbor3)); relPosNode1.bindDevicePointer(&(granData->relPosNode1)); relPosNode2.bindDevicePointer(&(granData->relPosNode2)); relPosNode3.bindDevicePointer(&(granData->relPosNode3)); @@ -754,6 +771,8 @@ void DEMKinematicThread::migrateDataToDevice() { previous_idPatchB.toDeviceAsync(streamInfo.stream); contactTypePatch.toDeviceAsync(streamInfo.stream); previous_contactTypePatch.toDeviceAsync(streamInfo.stream); + contactPatchIsland.toDeviceAsync(streamInfo.stream); + previous_contactPatchIsland.toDeviceAsync(streamInfo.stream); familyMaskMatrix.toDeviceAsync(streamInfo.stream); familyExtraMarginSize.toDeviceAsync(streamInfo.stream); @@ -763,7 +782,12 @@ void DEMKinematicThread::migrateDataToDevice() { ownerAnalBody.toDeviceAsync(streamInfo.stream); ownerTriMesh.toDeviceAsync(streamInfo.stream); + ownerMeshConvex.toDeviceAsync(streamInfo.stream); + ownerMeshNeverWinner.toDeviceAsync(streamInfo.stream); triPatchID.toDeviceAsync(streamInfo.stream); + triNeighbor1.toDeviceAsync(streamInfo.stream); + triNeighbor2.toDeviceAsync(streamInfo.stream); + triNeighbor3.toDeviceAsync(streamInfo.stream); relPosNode1.toDeviceAsync(streamInfo.stream); relPosNode2.toDeviceAsync(streamInfo.stream); relPosNode3.toDeviceAsync(streamInfo.stream); @@ -801,6 +825,7 @@ void DEMKinematicThread::packTransferPointers(DEMDynamicThread*& dT) { granData->pDTOwnedBuffer_idPatchA = dT->idPatchA_buffer[write_idx].data(); granData->pDTOwnedBuffer_idPatchB = dT->idPatchB_buffer[write_idx].data(); granData->pDTOwnedBuffer_contactTypePatch = dT->contactTypePatch_buffer[write_idx].data(); + granData->pDTOwnedBuffer_contactPatchIsland = dT->contactPatchIsland_buffer[write_idx].data(); granData->pDTOwnedBuffer_contactMapping = dT->contactMapping_buffer[write_idx].data(); } @@ -901,6 +926,8 @@ void DEMKinematicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(oriQx, nOwnerBodies, 0); DEME_DUAL_ARRAY_RESIZE(oriQy, nOwnerBodies, 0); DEME_DUAL_ARRAY_RESIZE(oriQz, nOwnerBodies, 0); + DEME_DUAL_ARRAY_RESIZE(ownerMeshConvex, nOwnerBodies, 0); + DEME_DUAL_ARRAY_RESIZE(ownerMeshNeverWinner, nOwnerBodies, 0); DEME_DEVICE_ARRAY_RESIZE(marginSizeSphere, nSpheresGM); DEME_DEVICE_ARRAY_RESIZE(marginSizeAnalytical, nAnalGM); DEME_DEVICE_ARRAY_RESIZE(marginSizeTriangle, nTriGM); @@ -948,6 +975,9 @@ void DEMKinematicThread::allocateGPUArrays(size_t nOwnerBodies, // Resize to the number of triangle facets DEME_DUAL_ARRAY_RESIZE(ownerTriMesh, nTriGM, 0); DEME_DUAL_ARRAY_RESIZE(triPatchID, nTriGM, 0); + DEME_DUAL_ARRAY_RESIZE(triNeighbor1, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor2, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor3, nTriGM, NULL_BODYID); DEME_DUAL_ARRAY_RESIZE(relPosNode1, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(relPosNode2, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(relPosNode3, nTriGM, make_float3(0)); @@ -986,6 +1016,7 @@ void DEMKinematicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(idPatchA, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(idPatchB, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(contactTypePatch, cnt_arr_size, NOT_A_CONTACT); + DEME_DUAL_ARRAY_RESIZE(contactPatchIsland, cnt_arr_size, NULL_BODYID); DEME_DUAL_ARRAY_RESIZE(geomToPatchMap, cnt_arr_size, 0); if (!solverFlags.isHistoryless) { @@ -997,6 +1028,7 @@ void DEMKinematicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(previous_idPatchA, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(previous_idPatchB, cnt_arr_size, 0); DEME_DUAL_ARRAY_RESIZE(previous_contactTypePatch, cnt_arr_size, NOT_A_CONTACT); + DEME_DUAL_ARRAY_RESIZE(previous_contactPatchIsland, cnt_arr_size, NULL_BODYID); } } } @@ -1010,8 +1042,13 @@ void DEMKinematicThread::registerPolicies(const std::vector& fa void DEMKinematicThread::populateEntityArrays(const std::vector>& input_clump_batches, const std::vector& input_ext_obj_family, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& input_mesh_facet_owner, const std::vector& input_mesh_facet_patch, + const std::vector& input_mesh_facet_neighbor1, + const std::vector& input_mesh_facet_neighbor2, + const std::vector& input_mesh_facet_neighbor3, const std::vector& input_mesh_facets, const ClumpTemplateFlatten& clump_templates, const std::vector& ext_obj_comp_num, @@ -1129,14 +1166,20 @@ void DEMKinematicThread::populateEntityArrays(const std::vector>& input_clump_batches, const std::vector& input_ext_obj_family, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& input_mesh_facet_owner, const std::vector& input_mesh_facet_patch, + const std::vector& input_mesh_facet_neighbor1, + const std::vector& input_mesh_facet_neighbor2, + const std::vector& input_mesh_facet_neighbor3, const std::vector& input_mesh_facets, const std::vector& ext_obj_comp_num, const std::vector& family_mask_matrix, @@ -1156,15 +1204,22 @@ void DEMKinematicThread::initGPUArrays(const std::vector>& input_clump_batches, const std::vector& input_ext_obj_family, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& input_mesh_facet_owner, const std::vector& input_mesh_facet_patch, + const std::vector& input_mesh_facet_neighbor1, + const std::vector& input_mesh_facet_neighbor2, + const std::vector& input_mesh_facet_neighbor3, const std::vector& input_mesh_facets, const std::vector& ext_obj_comp_num, const std::vector& family_mask_matrix, @@ -1177,9 +1232,11 @@ void DEMKinematicThread::updateClumpMeshArrays(const std::vector& dT_data, size_t nContacts) { @@ -1187,7 +1244,8 @@ void DEMKinematicThread::updatePrevContactArrays(DualStruct& dT_data, // Note kT never had the responsibility to migrate contact info to host, even at Update, as even in this case // its host-side update comes from dT overwritePrevContactArrays(granData, dT_data, previous_idPatchA, previous_idPatchB, previous_contactTypePatch, - typeStartCountPatchMap, simParams, solverScratchSpace, streamInfo.stream, nContacts); + previous_contactPatchIsland, typeStartCountPatchMap, simParams, solverScratchSpace, + streamInfo.stream, nContacts); DEME_DEBUG_PRINTF("Number of contacts after a user-manual contact load: %zu", nContacts); DEME_DEBUG_PRINTF("Number of spheres after a user-manual contact load: %zu", (size_t)simParams->nSpheresGM); } diff --git a/src/DEM/kT.h b/src/DEM/kT.h index 57800f7e..5620b030 100644 --- a/src/DEM/kT.h +++ b/src/DEM/kT.h @@ -195,10 +195,19 @@ class DEMKinematicThread { DualArray ownerClumpBody = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray ownerTriMesh = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray ownerAnalBody = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + // Mesh owner flags (indexed by owner body ID) + DualArray ownerMeshConvex = + DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray ownerMeshNeverWinner = + DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); // Mesh patch information: each facet belongs to a patch // Patch ID for each triangle facet (maps facet to patch) DualArray triPatchID = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + // Triangle edge neighbors (global triangle indices; NULL_BODYID for boundary) + DualArray triNeighbor1 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray triNeighbor2 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray triNeighbor3 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); // The ID that maps this sphere component's geometry-defining parameters, when this component is jitified DualArray clumpComponentOffset = @@ -224,6 +233,10 @@ class DEMKinematicThread { DualArray contactTypePatch = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray previous_contactTypePatch = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + // Island label per patch contact (winner-side primitive label) + DualArray contactPatchIsland = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); + DualArray previous_contactPatchIsland = + DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); // Mapping array: maps from primitive-based pair index to patch-based pair index // Same length as primitive pair arrays (idPrimitiveA/B). For each primitive pair, @@ -340,8 +353,13 @@ class DEMKinematicThread { void populateEntityArrays(const std::vector>& input_clump_batches, const std::vector& input_ext_obj_family, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& input_mesh_facet_owner, const std::vector& input_mesh_facet_patch, + const std::vector& input_mesh_facet_neighbor1, + const std::vector& input_mesh_facet_neighbor2, + const std::vector& input_mesh_facet_neighbor3, const std::vector& input_mesh_facets, const ClumpTemplateFlatten& clump_templates, const std::vector& ext_obj_comp_num, @@ -354,8 +372,13 @@ class DEMKinematicThread { void initGPUArrays(const std::vector>& input_clump_batches, const std::vector& input_ext_obj_family, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& input_mesh_facet_owner, const std::vector& input_mesh_facet_patch, + const std::vector& input_mesh_facet_neighbor1, + const std::vector& input_mesh_facet_neighbor2, + const std::vector& input_mesh_facet_neighbor3, const std::vector& input_mesh_facets, const std::vector& ext_obj_comp_num, const std::vector& family_mask_matrix, @@ -366,8 +389,13 @@ class DEMKinematicThread { void updateClumpMeshArrays(const std::vector>& input_clump_batches, const std::vector& input_ext_obj_family, const std::vector& input_mesh_obj_family, + const std::vector& input_mesh_obj_convex, + const std::vector& input_mesh_obj_never_winner, const std::vector& input_mesh_facet_owner, const std::vector& input_mesh_facet_patch, + const std::vector& input_mesh_facet_neighbor1, + const std::vector& input_mesh_facet_neighbor2, + const std::vector& input_mesh_facet_neighbor3, const std::vector& input_mesh_facets, const std::vector& ext_obj_comp_num, const std::vector& family_mask_matrix, diff --git a/src/algorithms/DEMContactDetection.cu b/src/algorithms/DEMContactDetection.cu index 5565e0ee..eee8d0a9 100644 --- a/src/algorithms/DEMContactDetection.cu +++ b/src/algorithms/DEMContactDetection.cu @@ -55,11 +55,13 @@ inline void patchArraysResize(size_t nPatchInvolvedContacts, DualArray& idA, DualArray& idB, DualArray& contactTypePatch, + DualArray& contactPatchIsland, DualStruct& granData) { // Note these resizing are automatically on kT's device DEME_DUAL_ARRAY_RESIZE_NOVAL(idA, nPatchInvolvedContacts); DEME_DUAL_ARRAY_RESIZE_NOVAL(idB, nPatchInvolvedContacts); DEME_DUAL_ARRAY_RESIZE_NOVAL(contactTypePatch, nPatchInvolvedContacts); + DEME_DUAL_ARRAY_RESIZE_NOVAL(contactPatchIsland, nPatchInvolvedContacts); // Re-packing pointers now is automatic @@ -301,6 +303,8 @@ void contactDetection(std::shared_ptr& bin_sphere_kern DualArray& previous_idPatchB, DualArray& contactTypePatch, DualArray& previous_contactTypePatch, + DualArray& contactPatchIsland, + DualArray& previous_contactPatchIsland, ContactTypeMap>& typeStartCountPatchMap, DualArray& geomToPatchMap, cudaStream_t& this_stream, @@ -1366,48 +1370,429 @@ void contactDetection(std::shared_ptr& bin_sphere_kern isNewGroup, numTotalCnts); } - // Prefix scan gives 0-based patch-contact indices for each primitive contact. - cubDEMInclusiveScan(isNewGroup, granData->geomToPatchMap, numTotalCnts, - this_stream, scratchPad); + // Prefix scan gives 0-based group indices for each primitive contact (grouped by type + patch pair). + contactPairs_t* groupIndex = + (contactPairs_t*)scratchPad.allocateTempVector("groupIndex", numTotalCnts * sizeof(contactPairs_t)); + cubDEMInclusiveScan(isNewGroup, groupIndex, numTotalCnts, this_stream, + scratchPad); // Flip the first element to 1 so it can be used for selection flags. setFirstFlagToOne<<<1, 1, 0, this_stream>>>(isNewGroup, numTotalCnts); - scratchPad.allocateDualStruct("numUniquePatchPairs"); - cubDEMSum(isNewGroup, scratchPad.getDualStructDevice("numUniquePatchPairs"), + scratchPad.allocateDualStruct("numUniqueGroups"); + cubDEMSum(isNewGroup, scratchPad.getDualStructDevice("numUniqueGroups"), numTotalCnts, this_stream, scratchPad); - scratchPad.syncDualStructDeviceToHost("numUniquePatchPairs"); - size_t numUniquePatchPairs = *scratchPad.getDualStructHost("numUniquePatchPairs"); + scratchPad.syncDualStructDeviceToHost("numUniqueGroups"); + size_t numGroups = *scratchPad.getDualStructHost("numUniqueGroups"); + + // Select group contact types (one per group). + contact_t* groupContactTypes = nullptr; + if (numGroups > 0) { + groupContactTypes = (contact_t*)scratchPad.allocateTempVector("groupContactTypes", + numGroups * sizeof(contact_t)); + cubDEMSelectFlagged( + granData->contactTypePrimitive, groupContactTypes, isNewGroup, + scratchPad.getDualStructDevice("numUniqueGroups"), numTotalCnts, this_stream, scratchPad); + } + // Select representative primitive IDs per group (first contact in each group). + bodyID_t* groupPrimA = nullptr; + bodyID_t* groupPrimB = nullptr; + if (numGroups > 0) { + groupPrimA = + (bodyID_t*)scratchPad.allocateTempVector("groupPrimA", numGroups * sizeof(bodyID_t)); + groupPrimB = + (bodyID_t*)scratchPad.allocateTempVector("groupPrimB", numGroups * sizeof(bodyID_t)); + cubDEMSelectFlagged( + granData->idPrimitiveA, groupPrimA, isNewGroup, + scratchPad.getDualStructDevice("numUniqueGroups"), numTotalCnts, this_stream, scratchPad); + cubDEMSelectFlagged( + granData->idPrimitiveB, groupPrimB, isNewGroup, + scratchPad.getDualStructDevice("numUniqueGroups"), numTotalCnts, this_stream, scratchPad); + } + + // Count unique primitives per group on each side. + contactPairs_t* groupUniqueCountA = + (contactPairs_t*)scratchPad.allocateTempVector("groupUniqueCountA", numGroups * sizeof(contactPairs_t)); + contactPairs_t* groupUniqueCountB = + (contactPairs_t*)scratchPad.allocateTempVector("groupUniqueCountB", numGroups * sizeof(contactPairs_t)); + if (numGroups > 0) { + DEME_GPU_CALL(cudaMemsetAsync(groupUniqueCountA, 0, numGroups * sizeof(contactPairs_t), this_stream)); + DEME_GPU_CALL(cudaMemsetAsync(groupUniqueCountB, 0, numGroups * sizeof(contactPairs_t), this_stream)); + } + + uint64_t* keyA = (uint64_t*)scratchPad.allocateTempVector("groupPrimKeyA", numTotalCnts * sizeof(uint64_t)); + uint64_t* keyA_sorted = + (uint64_t*)scratchPad.allocateTempVector("groupPrimKeyA_sorted", numTotalCnts * sizeof(uint64_t)); + if (blocks_needed_for_patch_ids > 0) { + buildGroupPrimitiveKeys<<>>(groupIndex, granData->idPrimitiveA, keyA, numTotalCnts); + } + cubDEMSortKeys(keyA, keyA_sorted, numTotalCnts, this_stream, scratchPad); + + uint64_t* uniqueKeyA = + (uint64_t*)scratchPad.allocateTempVector("uniqueKeyA", numTotalCnts * sizeof(uint64_t)); + scratchPad.allocateDualStruct("numUniqueKeyA"); + cubDEMUnique(keyA_sorted, uniqueKeyA, scratchPad.getDualStructDevice("numUniqueKeyA"), + numTotalCnts, this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numUniqueKeyA"); + size_t numUniqueKeyA = *scratchPad.getDualStructHost("numUniqueKeyA"); + if (numUniqueKeyA > 0) { + contactPairs_t* uniqueGroupA = (contactPairs_t*)scratchPad.allocateTempVector( + "uniqueGroupA", numUniqueKeyA * sizeof(contactPairs_t)); + size_t blocks_needed_unique = + (numUniqueKeyA + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + extractGroupIndexFromKey<<>>(uniqueKeyA, uniqueGroupA, numUniqueKeyA); + + contactPairs_t* uniqueGroupsA = (contactPairs_t*)scratchPad.allocateTempVector( + "uniqueGroupsA", numUniqueKeyA * sizeof(contactPairs_t)); + contactPairs_t* countsA = (contactPairs_t*)scratchPad.allocateTempVector( + "uniqueCountsA", numUniqueKeyA * sizeof(contactPairs_t)); + scratchPad.allocateDualStruct("numGroupsA"); + cubDEMRunLengthEncode( + uniqueGroupA, uniqueGroupsA, countsA, scratchPad.getDualStructDevice("numGroupsA"), + numUniqueKeyA, this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numGroupsA"); + size_t numGroupsA = *scratchPad.getDualStructHost("numGroupsA"); + if (numGroupsA > 0) { + size_t blocks_needed_groups = + (numGroupsA + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + scatterGroupCounts<<>>( + uniqueGroupsA, countsA, groupUniqueCountA, numGroupsA); + } + scratchPad.finishUsingTempVector("uniqueGroupsA"); + scratchPad.finishUsingTempVector("uniqueCountsA"); + scratchPad.finishUsingDualStruct("numGroupsA"); + scratchPad.finishUsingTempVector("uniqueGroupA"); + } + scratchPad.finishUsingTempVector("uniqueKeyA"); + scratchPad.finishUsingDualStruct("numUniqueKeyA"); + scratchPad.finishUsingTempVector("groupPrimKeyA_sorted"); + scratchPad.finishUsingTempVector("groupPrimKeyA"); + + uint64_t* keyB = (uint64_t*)scratchPad.allocateTempVector("groupPrimKeyB", numTotalCnts * sizeof(uint64_t)); + uint64_t* keyB_sorted = + (uint64_t*)scratchPad.allocateTempVector("groupPrimKeyB_sorted", numTotalCnts * sizeof(uint64_t)); + if (blocks_needed_for_patch_ids > 0) { + buildGroupPrimitiveKeys<<>>(groupIndex, granData->idPrimitiveB, keyB, numTotalCnts); + } + cubDEMSortKeys(keyB, keyB_sorted, numTotalCnts, this_stream, scratchPad); + + uint64_t* uniqueKeyB = + (uint64_t*)scratchPad.allocateTempVector("uniqueKeyB", numTotalCnts * sizeof(uint64_t)); + scratchPad.allocateDualStruct("numUniqueKeyB"); + cubDEMUnique(keyB_sorted, uniqueKeyB, scratchPad.getDualStructDevice("numUniqueKeyB"), + numTotalCnts, this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numUniqueKeyB"); + size_t numUniqueKeyB = *scratchPad.getDualStructHost("numUniqueKeyB"); + if (numUniqueKeyB > 0) { + contactPairs_t* uniqueGroupB = (contactPairs_t*)scratchPad.allocateTempVector( + "uniqueGroupB", numUniqueKeyB * sizeof(contactPairs_t)); + size_t blocks_needed_unique = + (numUniqueKeyB + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + extractGroupIndexFromKey<<>>(uniqueKeyB, uniqueGroupB, numUniqueKeyB); + + contactPairs_t* uniqueGroupsB = (contactPairs_t*)scratchPad.allocateTempVector( + "uniqueGroupsB", numUniqueKeyB * sizeof(contactPairs_t)); + contactPairs_t* countsB = (contactPairs_t*)scratchPad.allocateTempVector( + "uniqueCountsB", numUniqueKeyB * sizeof(contactPairs_t)); + scratchPad.allocateDualStruct("numGroupsB"); + cubDEMRunLengthEncode( + uniqueGroupB, uniqueGroupsB, countsB, scratchPad.getDualStructDevice("numGroupsB"), + numUniqueKeyB, this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numGroupsB"); + size_t numGroupsB = *scratchPad.getDualStructHost("numGroupsB"); + if (numGroupsB > 0) { + size_t blocks_needed_groups = + (numGroupsB + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + scatterGroupCounts<<>>( + uniqueGroupsB, countsB, groupUniqueCountB, numGroupsB); + } + scratchPad.finishUsingTempVector("uniqueGroupsB"); + scratchPad.finishUsingTempVector("uniqueCountsB"); + scratchPad.finishUsingDualStruct("numGroupsB"); + scratchPad.finishUsingTempVector("uniqueGroupB"); + } + scratchPad.finishUsingTempVector("uniqueKeyB"); + scratchPad.finishUsingDualStruct("numUniqueKeyB"); + scratchPad.finishUsingTempVector("groupPrimKeyB_sorted"); + scratchPad.finishUsingTempVector("groupPrimKeyB"); + + // Decide winner side per group. + notStupidBool_t* groupWinnerIsA = + (notStupidBool_t*)scratchPad.allocateTempVector("groupWinnerIsA", numGroups * sizeof(notStupidBool_t)); + notStupidBool_t* groupWinnerIsTri = (notStupidBool_t*)scratchPad.allocateTempVector( + "groupWinnerIsTri", numGroups * sizeof(notStupidBool_t)); + notStupidBool_t* groupForceSingleIsland = (notStupidBool_t*)scratchPad.allocateTempVector( + "groupForceSingleIsland", numGroups * sizeof(notStupidBool_t)); + if (numGroups > 0) { + size_t blocks_needed_groups = + (numGroups + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + computeGroupWinners<<>>( + groupContactTypes, groupPrimA, groupPrimB, groupUniqueCountA, groupUniqueCountB, + granData->ownerTriMesh, granData->ownerMeshConvex, granData->ownerMeshNeverWinner, groupWinnerIsA, + groupWinnerIsTri, groupForceSingleIsland, numGroups); + } + + // Winner primitive per contact. + bodyID_t* winnerPrimitive = + (bodyID_t*)scratchPad.allocateTempVector("winnerPrimitive", numTotalCnts * sizeof(bodyID_t)); + notStupidBool_t* winnerIsTri = + (notStupidBool_t*)scratchPad.allocateTempVector("winnerIsTri", numTotalCnts * sizeof(notStupidBool_t)); + if (blocks_needed_for_patch_ids > 0) { + selectWinnerPrimitive<<>>(groupIndex, granData->idPrimitiveA, granData->idPrimitiveB, + groupWinnerIsA, groupWinnerIsTri, groupForceSingleIsland, + winnerPrimitive, winnerIsTri, numTotalCnts); + } + + // Build active triangle keys and compact. + uint64_t* activeTriKeysAll = + (uint64_t*)scratchPad.allocateTempVector("activeTriKeysAll", numTotalCnts * sizeof(uint64_t)); + notStupidBool_t* activeTriFlags = winnerIsTri; // reuse winnerIsTri as flags + if (blocks_needed_for_patch_ids > 0) { + buildActiveTriKeys<<>>(groupIndex, winnerPrimitive, activeTriFlags, activeTriKeysAll, + activeTriFlags, numTotalCnts); + } + uint64_t* activeTriKeys = + (uint64_t*)scratchPad.allocateTempVector("activeTriKeys", numTotalCnts * sizeof(uint64_t)); + scratchPad.allocateDualStruct("numActiveTriKeys"); + cubDEMSelectFlagged(activeTriKeysAll, activeTriKeys, activeTriFlags, + scratchPad.getDualStructDevice("numActiveTriKeys"), + numTotalCnts, this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numActiveTriKeys"); + size_t numActiveTriKeys = *scratchPad.getDualStructHost("numActiveTriKeys"); + + uint64_t* activeTriKeysUnique = nullptr; + bodyID_t* activeLabelsA = nullptr; + bodyID_t* activeLabelsB = nullptr; + contactPairs_t* groupActiveCount = nullptr; + contactPairs_t* groupActiveStart = nullptr; + bodyID_t* finalActiveLabels = nullptr; + size_t numUniqueActiveTri = 0; + + if (numActiveTriKeys > 0) { + uint64_t* activeTriKeys_sorted = (uint64_t*)scratchPad.allocateTempVector( + "activeTriKeys_sorted", numActiveTriKeys * sizeof(uint64_t)); + cubDEMSortKeys(activeTriKeys, activeTriKeys_sorted, numActiveTriKeys, this_stream, + scratchPad); + + activeTriKeysUnique = (uint64_t*)scratchPad.allocateTempVector( + "activeTriKeys_unique", numActiveTriKeys * sizeof(uint64_t)); + scratchPad.allocateDualStruct("numUniqueActiveTri"); + cubDEMUnique(activeTriKeys_sorted, activeTriKeysUnique, + scratchPad.getDualStructDevice("numUniqueActiveTri"), numActiveTriKeys, + this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numUniqueActiveTri"); + numUniqueActiveTri = *scratchPad.getDualStructHost("numUniqueActiveTri"); + + if (numUniqueActiveTri > 0) { + activeLabelsA = (bodyID_t*)scratchPad.allocateTempVector( + "activeTriLabelsA", numUniqueActiveTri * sizeof(bodyID_t)); + activeLabelsB = (bodyID_t*)scratchPad.allocateTempVector( + "activeTriLabelsB", numUniqueActiveTri * sizeof(bodyID_t)); + size_t blocks_needed_active = + (numUniqueActiveTri + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + initActiveTriLabels<<>>(activeTriKeysUnique, activeLabelsA, numUniqueActiveTri); + initActiveTriLabels<<>>(activeTriKeysUnique, activeLabelsB, numUniqueActiveTri); + + groupActiveCount = (contactPairs_t*)scratchPad.allocateTempVector( + "groupActiveCount", numGroups * sizeof(contactPairs_t)); + DEME_GPU_CALL(cudaMemsetAsync(groupActiveCount, 0, numGroups * sizeof(contactPairs_t), this_stream)); + countActiveTriPerGroup<<>>(activeTriKeysUnique, groupActiveCount, numUniqueActiveTri); + + groupActiveStart = (contactPairs_t*)scratchPad.allocateTempVector( + "groupActiveStart", numGroups * sizeof(contactPairs_t)); + if (numGroups > 0) { + cubDEMPrefixScan(groupActiveCount, groupActiveStart, numGroups, + this_stream, scratchPad); + } + + // Label propagation iterations. + const int kLabelIters = 4; + bodyID_t* labelsIn = activeLabelsA; + bodyID_t* labelsOut = activeLabelsB; + for (int iter = 0; iter < kLabelIters; ++iter) { + propagateActiveTriLabels<<>>(activeTriKeysUnique, labelsIn, labelsOut, + groupActiveStart, groupActiveCount, + granData->triNeighbor1, granData->triNeighbor2, + granData->triNeighbor3, numUniqueActiveTri); + bodyID_t* tmp = labelsIn; + labelsIn = labelsOut; + labelsOut = tmp; + } + finalActiveLabels = labelsIn; + } + + scratchPad.finishUsingTempVector("activeTriKeys_sorted"); + scratchPad.finishUsingDualStruct("numUniqueActiveTri"); + } + + // Assign island label per contact (winner primitive label or propagated triangle label). + bodyID_t* contactIslandLabel = + (bodyID_t*)scratchPad.allocateTempVector("contactIslandLabel", numTotalCnts * sizeof(bodyID_t)); + if (numUniqueActiveTri > 0) { + size_t blocks_needed_labels = + (numTotalCnts + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + assignContactIslandLabel<<>>(groupIndex, winnerPrimitive, winnerIsTri, activeTriKeysUnique, + finalActiveLabels, groupActiveStart, groupActiveCount, + contactIslandLabel, numTotalCnts); + } else { + size_t blocks_needed_labels = + (numTotalCnts + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + if (blocks_needed_labels > 0) { + copyBodyIDArray<<>>( + winnerPrimitive, contactIslandLabel, numTotalCnts); + } + } + + // Build composite key parts (primary + secondary) for island grouping. + uint64_t* islandKeyHi = + (uint64_t*)scratchPad.allocateTempVector("islandKeyHi", numTotalCnts * sizeof(uint64_t)); + uint64_t* islandKeyLo = + (uint64_t*)scratchPad.allocateTempVector("islandKeyLo", numTotalCnts * sizeof(uint64_t)); + if (blocks_needed_for_patch_ids > 0) { + buildIslandCompositeKeyParts<<>>(contactPatchPairs, granData->contactTypePrimitive, + contactIslandLabel, islandKeyHi, islandKeyLo, + numTotalCnts); + } - if (numUniquePatchPairs > idPatchA.size()) { - DEME_DUAL_ARRAY_RESIZE_NOVAL(idPatchA, numUniquePatchPairs); - DEME_DUAL_ARRAY_RESIZE_NOVAL(idPatchB, numUniquePatchPairs); - DEME_DUAL_ARRAY_RESIZE_NOVAL(contactTypePatch, numUniquePatchPairs); + contactPairs_t* island_sort_indices = + (contactPairs_t*)scratchPad.allocateTempVector("islandSortIndices", idx_arr_bytes); + contactPairs_t* island_sort_indices_sorted = + (contactPairs_t*)scratchPad.allocateTempVector("islandSortIndices_sorted", idx_arr_bytes); + if (blocks_needed_for_patch_ids > 0) { + lineNumbers<<>>( + island_sort_indices, numTotalCnts); + } + + // Two-pass stable sort to avoid the CUDA 13/CUB compile error with ulonglong2 (128-bit) keys. + uint64_t* islandKeyLo_sorted = + (uint64_t*)scratchPad.allocateTempVector("islandKeyLo_sorted", numTotalCnts * sizeof(uint64_t)); + cubDEMSortByKeys(islandKeyLo, islandKeyLo_sorted, island_sort_indices, + island_sort_indices_sorted, numTotalCnts, this_stream, + scratchPad); + + uint64_t* islandKeyHi_by_lo = + (uint64_t*)scratchPad.allocateTempVector("islandKeyHi_by_lo", numTotalCnts * sizeof(uint64_t)); + if (blocks_needed_for_patch_ids > 0) { + gatherByIndex + <<>>( + islandKeyHi, islandKeyHi_by_lo, island_sort_indices_sorted, numTotalCnts); + } + + uint64_t* islandKeyHi_sorted = + (uint64_t*)scratchPad.allocateTempVector("islandKeyHi_sorted", numTotalCnts * sizeof(uint64_t)); + // Stable sort by primary key (contactType + patchA), preserving low-key order. + cubDEMSortByKeys(islandKeyHi_by_lo, islandKeyHi_sorted, + island_sort_indices_sorted, island_sort_indices, numTotalCnts, + this_stream, scratchPad); + + // Reorder primitive arrays by island keys. + if (blocks_needed_for_patch_ids > 0) { + gatherByIndex + <<>>( + granData->idPrimitiveA, idA_sorted, island_sort_indices, numTotalCnts); + gatherByIndex + <<>>( + granData->idPrimitiveB, idB_sorted, island_sort_indices, numTotalCnts); + gatherByIndex + <<>>( + granData->contactTypePrimitive, contactType_sorted, island_sort_indices, numTotalCnts); + gatherByIndex + <<>>( + granData->contactPersistency, contactPersistency_sorted, island_sort_indices, + numTotalCnts); + gatherByIndex + <<>>( + contactPatchPairs, patchPairs_sorted, island_sort_indices, numTotalCnts); + } + + bodyID_t* contactIslandLabel_sorted = (bodyID_t*)scratchPad.allocateTempVector( + "contactIslandLabel_sorted", numTotalCnts * sizeof(bodyID_t)); + if (blocks_needed_for_patch_ids > 0) { + gatherByIndex + <<>>( + contactIslandLabel, contactIslandLabel_sorted, island_sort_indices, numTotalCnts); + } + + DEME_GPU_CALL(cudaMemcpyAsync(granData->idPrimitiveA, idA_sorted, total_ids_bytes, + cudaMemcpyDeviceToDevice, this_stream)); + DEME_GPU_CALL(cudaMemcpyAsync(granData->idPrimitiveB, idB_sorted, total_ids_bytes, + cudaMemcpyDeviceToDevice, this_stream)); + DEME_GPU_CALL(cudaMemcpyAsync(granData->contactTypePrimitive, contactType_sorted, type_arr_bytes, + cudaMemcpyDeviceToDevice, this_stream)); + DEME_GPU_CALL(cudaMemcpyAsync(granData->contactPersistency, contactPersistency_sorted, + total_persistency_bytes, cudaMemcpyDeviceToDevice, this_stream)); + DEME_GPU_CALL(cudaMemcpyAsync(contactPatchPairs, patchPairs_sorted, patch_arr_bytes, + cudaMemcpyDeviceToDevice, this_stream)); + + // Build final geomToPatchMap based on island keys. + contactPairs_t* isNewIslandGroup = + (contactPairs_t*)scratchPad.allocateTempVector("isNewIslandGroup", numTotalCnts * sizeof(contactPairs_t)); + if (blocks_needed_for_patch_ids > 0) { + uint64_t* islandKeyLo_sorted_by_hi = (uint64_t*)scratchPad.allocateTempVector( + "islandKeyLo_sorted_by_hi", numTotalCnts * sizeof(uint64_t)); + gatherByIndex + <<>>( + islandKeyLo, islandKeyLo_sorted_by_hi, island_sort_indices, numTotalCnts); + markNewCompositeGroups64<<>>(islandKeyHi_sorted, islandKeyLo_sorted_by_hi, + isNewIslandGroup, numTotalCnts); + scratchPad.finishUsingTempVector("islandKeyLo_sorted_by_hi"); + } + cubDEMInclusiveScan(isNewIslandGroup, granData->geomToPatchMap, numTotalCnts, + this_stream, scratchPad); + setFirstFlagToOne<<<1, 1, 0, this_stream>>>(isNewIslandGroup, numTotalCnts); + + scratchPad.allocateDualStruct("numUniqueIslands"); + cubDEMSum(isNewIslandGroup, scratchPad.getDualStructDevice("numUniqueIslands"), + numTotalCnts, this_stream, scratchPad); + scratchPad.syncDualStructDeviceToHost("numUniqueIslands"); + size_t numUniqueIslands = *scratchPad.getDualStructHost("numUniqueIslands"); + + if (numUniqueIslands > idPatchA.size()) { + DEME_DUAL_ARRAY_RESIZE_NOVAL(idPatchA, numUniqueIslands); + DEME_DUAL_ARRAY_RESIZE_NOVAL(idPatchB, numUniqueIslands); + DEME_DUAL_ARRAY_RESIZE_NOVAL(contactTypePatch, numUniqueIslands); + DEME_DUAL_ARRAY_RESIZE_NOVAL(contactPatchIsland, numUniqueIslands); granData.toDevice(); } patchIDPair_t* unique_patch_pairs = nullptr; - if (numUniquePatchPairs > 0) { + if (numUniqueIslands > 0) { unique_patch_pairs = (patchIDPair_t*)scratchPad.allocateTempVector( - "unique_patch_pairs", numUniquePatchPairs * sizeof(patchIDPair_t)); + "unique_patch_pairs", numUniqueIslands * sizeof(patchIDPair_t)); cubDEMSelectFlagged( - contactPatchPairs, unique_patch_pairs, isNewGroup, - scratchPad.getDualStructDevice("numUniquePatchPairs"), numTotalCnts, this_stream, scratchPad); + contactPatchPairs, unique_patch_pairs, isNewIslandGroup, + scratchPad.getDualStructDevice("numUniqueIslands"), numTotalCnts, this_stream, scratchPad); cubDEMSelectFlagged( - granData->contactTypePrimitive, granData->contactTypePatch, isNewGroup, - scratchPad.getDualStructDevice("numUniquePatchPairs"), numTotalCnts, this_stream, scratchPad); + granData->contactTypePrimitive, granData->contactTypePatch, isNewIslandGroup, + scratchPad.getDualStructDevice("numUniqueIslands"), numTotalCnts, this_stream, scratchPad); + cubDEMSelectFlagged( + contactIslandLabel_sorted, contactPatchIsland.data(), isNewIslandGroup, + scratchPad.getDualStructDevice("numUniqueIslands"), numTotalCnts, this_stream, scratchPad); size_t blocks_needed_for_decode = - (numUniquePatchPairs + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; + (numUniqueIslands + DEME_MAX_THREADS_PER_BLOCK - 1) / DEME_MAX_THREADS_PER_BLOCK; decodePatchPairsToSeparateArrays<<>>(unique_patch_pairs, granData->idPatchA, - granData->idPatchB, numUniquePatchPairs); + granData->idPatchB, numUniqueIslands); } - *scratchPad.numContacts = numUniquePatchPairs; + *scratchPad.numContacts = numUniqueIslands; // Build per-type start/count map for patch contacts. typeStartCountPatchMap_thisStep.SetAll({0, 0}); - if (numUniquePatchPairs > 0) { + if (numUniqueIslands > 0) { contact_t* unique_types = (contact_t*)scratchPad.allocateTempVector( "unique_types", NUM_SUPPORTED_CONTACT_TYPES * sizeof(contact_t)); contactPairs_t* type_counts = (contactPairs_t*)scratchPad.allocateTempVector( @@ -1416,7 +1801,7 @@ void contactDetection(std::shared_ptr& bin_sphere_kern cubDEMRunLengthEncode(granData->contactTypePatch, unique_types, type_counts, scratchPad.getDualStructDevice("numUniqueTypes"), - numUniquePatchPairs, this_stream, scratchPad); + numUniqueIslands, this_stream, scratchPad); scratchPad.syncDualStructDeviceToHost("numUniqueTypes"); size_t numTypes = *scratchPad.getDualStructHost("numUniqueTypes"); @@ -1443,8 +1828,54 @@ void contactDetection(std::shared_ptr& bin_sphere_kern if (unique_patch_pairs) { scratchPad.finishUsingTempVector("unique_patch_pairs"); } + scratchPad.finishUsingTempVector("contactIslandLabel_sorted"); + if (groupContactTypes) { + scratchPad.finishUsingTempVector("groupContactTypes"); + } + if (groupPrimA) { + scratchPad.finishUsingTempVector("groupPrimA"); + } + if (groupPrimB) { + scratchPad.finishUsingTempVector("groupPrimB"); + } + scratchPad.finishUsingTempVector("groupUniqueCountA"); + scratchPad.finishUsingTempVector("groupUniqueCountB"); + scratchPad.finishUsingTempVector("groupWinnerIsA"); + scratchPad.finishUsingTempVector("groupWinnerIsTri"); + scratchPad.finishUsingTempVector("groupForceSingleIsland"); + scratchPad.finishUsingTempVector("winnerPrimitive"); + scratchPad.finishUsingTempVector("winnerIsTri"); + scratchPad.finishUsingTempVector("activeTriKeysAll"); + scratchPad.finishUsingTempVector("activeTriKeys"); + scratchPad.finishUsingDualStruct("numActiveTriKeys"); + if (activeTriKeysUnique) { + scratchPad.finishUsingTempVector("activeTriKeys_unique"); + } + if (activeLabelsA) { + scratchPad.finishUsingTempVector("activeTriLabelsA"); + } + if (activeLabelsB) { + scratchPad.finishUsingTempVector("activeTriLabelsB"); + } + if (groupActiveCount) { + scratchPad.finishUsingTempVector("groupActiveCount"); + } + if (groupActiveStart) { + scratchPad.finishUsingTempVector("groupActiveStart"); + } + scratchPad.finishUsingTempVector("contactIslandLabel"); + scratchPad.finishUsingTempVector("islandKeyHi"); + scratchPad.finishUsingTempVector("islandKeyLo"); + scratchPad.finishUsingTempVector("islandKeyLo_sorted"); + scratchPad.finishUsingTempVector("islandKeyHi_by_lo"); + scratchPad.finishUsingTempVector("islandKeyHi_sorted"); + scratchPad.finishUsingTempVector("islandSortIndices"); + scratchPad.finishUsingTempVector("islandSortIndices_sorted"); + scratchPad.finishUsingTempVector("isNewIslandGroup"); + scratchPad.finishUsingDualStruct("numUniqueIslands"); + scratchPad.finishUsingTempVector("groupIndex"); scratchPad.finishUsingTempVector("isNewGroup"); - scratchPad.finishUsingDualStruct("numUniquePatchPairs"); + scratchPad.finishUsingDualStruct("numUniqueGroups"); scratchPad.finishUsingTempVector("contactPatchPairs"); scratchPad.finishUsingTempVector("patchPairs_sorted"); scratchPad.finishUsingTempVector("contactSortIndices"); @@ -1517,9 +1948,9 @@ void contactDetection(std::shared_ptr& bin_sphere_kern // Both steps have contacts of this type - perform mapping buildPatchContactMappingForType<<>>( - granData->idPatchA, granData->idPatchB, granData->previous_idPatchA, - granData->previous_idPatchB, granData->contactMapping, curr_start, curr_count, prev_start, - prev_count); + granData->idPatchA, granData->idPatchB, granData->contactPatchIsland, + granData->previous_idPatchA, granData->previous_idPatchB, granData->previous_contactPatchIsland, + granData->contactMapping, curr_start, curr_count, prev_start, prev_count); } } // Synchronize once after all mapping kernels are launched @@ -1531,7 +1962,7 @@ void contactDetection(std::shared_ptr& bin_sphere_kern size_t patch_type_arr_bytes = (*scratchPad.numContacts) * sizeof(contact_t); if (*scratchPad.numContacts > previous_idPatchA.size()) { patchArraysResize(*scratchPad.numContacts, previous_idPatchA, previous_idPatchB, - previous_contactTypePatch, granData); + previous_contactTypePatch, previous_contactPatchIsland, granData); } int dev = 0; DEME_GPU_CALL(cudaGetDevice(&dev)); @@ -1540,6 +1971,7 @@ void contactDetection(std::shared_ptr& bin_sphere_kern xt.add(granData->previous_idPatchA, granData->idPatchA, patch_id_arr_bytes); xt.add(granData->previous_idPatchB, granData->idPatchB, patch_id_arr_bytes); xt.add(granData->previous_contactTypePatch, granData->contactTypePatch, patch_type_arr_bytes); + xt.add(granData->previous_contactPatchIsland, granData->contactPatchIsland, patch_id_arr_bytes); xt.run(dev, dev, this_stream); } @@ -1600,6 +2032,7 @@ void overwritePrevContactArrays(DualStruct& kT_data, DualArray& previous_idPatchA, DualArray& previous_idPatchB, DualArray& previous_contactTypePatch, + DualArray& previous_contactPatchIsland, ContactTypeMap>& typeStartCountPatchMap, DualStruct& simParams, DEMSolverScratchData& scratchPad, @@ -1607,7 +2040,8 @@ void overwritePrevContactArrays(DualStruct& kT_data, size_t nContacts) { // Make sure the storage is large enough if (nContacts > previous_idPatchA.size()) { - patchArraysResize(nContacts, previous_idPatchA, previous_idPatchB, previous_contactTypePatch, kT_data); + patchArraysResize(nContacts, previous_idPatchA, previous_idPatchB, previous_contactTypePatch, + previous_contactPatchIsland, kT_data); } // No sort, copy over @@ -1618,6 +2052,7 @@ void overwritePrevContactArrays(DualStruct& kT_data, xt.add(kT_data->previous_idPatchA, dT_data->idPatchA, nContacts * sizeof(bodyID_t)); xt.add(kT_data->previous_idPatchB, dT_data->idPatchB, nContacts * sizeof(bodyID_t)); xt.add(kT_data->previous_contactTypePatch, dT_data->contactTypePatch, nContacts * sizeof(contact_t)); + xt.add(kT_data->previous_contactPatchIsland, dT_data->contactPatchIsland, nContacts * sizeof(bodyID_t)); xt.run(dev, dev, this_stream); } diff --git a/src/algorithms/DEMContactDetectionKernels.cuh b/src/algorithms/DEMContactDetectionKernels.cuh index 52d54b28..aa52a54e 100644 --- a/src/algorithms/DEMContactDetectionKernels.cuh +++ b/src/algorithms/DEMContactDetectionKernels.cuh @@ -248,6 +248,312 @@ __global__ void gatherByIndex(const T* in, T* out, const contactPairs_t* idx, si } } +// Build packed (groupIndex, primitiveID) keys for unique counting. +__global__ void buildGroupPrimitiveKeys(const contactPairs_t* groupIndex, + const bodyID_t* primitiveIDs, + uint64_t* keys, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + keys[myID] = (static_cast(groupIndex[myID]) << 32) | static_cast(primitiveIDs[myID]); + } +} + +// Extract group indices from packed keys (high 32 bits). +__global__ void extractGroupIndexFromKey(const uint64_t* keys, contactPairs_t* groupIndex, size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + groupIndex[myID] = static_cast(keys[myID] >> 32); + } +} + +// Scatter run-length counts into dense per-group counters. +__global__ void scatterGroupCounts(const contactPairs_t* groupIDs, + const contactPairs_t* counts, + contactPairs_t* groupCounts, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + groupCounts[groupIDs[myID]] = counts[myID]; + } +} + +// Determine winner side for each group. +__global__ void computeGroupWinners(const contact_t* groupTypes, + const bodyID_t* groupPrimA, + const bodyID_t* groupPrimB, + const contactPairs_t* countA, + const contactPairs_t* countB, + const bodyID_t* ownerTriMesh, + const notStupidBool_t* ownerMeshConvex, + const notStupidBool_t* ownerMeshNeverWinner, + notStupidBool_t* winnerIsA, + notStupidBool_t* winnerIsTri, + notStupidBool_t* forceSingleIsland, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const contact_t ctype = groupTypes[myID]; + const geoType_t typeA = decodeTypeA(ctype); + const geoType_t typeB = decodeTypeB(ctype); + const contactPairs_t nA = countA[myID]; + const contactPairs_t nB = countB[myID]; + const bool A_is_tri = (typeA == GEO_T_TRIANGLE); + const bool B_is_tri = (typeB == GEO_T_TRIANGLE); + bool A_convex = false; + bool B_convex = false; + bool A_never = false; + bool B_never = false; + if (A_is_tri) { + const bodyID_t ownerA = ownerTriMesh[groupPrimA[myID]]; + if (ownerA != NULL_BODYID) { + A_convex = (ownerMeshConvex[ownerA] != 0); + A_never = (ownerMeshNeverWinner[ownerA] != 0); + } + } + if (B_is_tri) { + const bodyID_t ownerB = ownerTriMesh[groupPrimB[myID]]; + if (ownerB != NULL_BODYID) { + B_convex = (ownerMeshConvex[ownerB] != 0); + B_never = (ownerMeshNeverWinner[ownerB] != 0); + } + } + const bool single_island = (A_is_tri && B_is_tri && A_convex && B_convex); + forceSingleIsland[myID] = single_island ? 1 : 0; + + notStupidBool_t pickA = 0; + if (A_never && !B_never) { + pickA = 0; + } else if (B_never && !A_never) { + pickA = 1; + } else if (nA > nB) { + pickA = 1; + } else if (nA < nB) { + pickA = 0; + } else { + if (A_is_tri && B_is_tri) { + if (A_convex != B_convex) { + pickA = A_convex ? 0 : 1; // prefer concave if tied + } else { + pickA = 0; // deterministic tie-break: prefer B + } + } else if (A_is_tri && !B_is_tri) { + pickA = 1; + } else if (B_is_tri && !A_is_tri) { + pickA = 0; + } else { + pickA = 0; // deterministic tie-break: prefer B + } + } + winnerIsA[myID] = pickA; + if (single_island) { + winnerIsTri[myID] = 0; + } else { + const geoType_t winnerType = (pickA ? typeA : typeB); + winnerIsTri[myID] = (winnerType == GEO_T_TRIANGLE) ? 1 : 0; + } + } +} + +// Select winner primitive and flag if it is a triangle. +__global__ void selectWinnerPrimitive(const contactPairs_t* groupIndex, + const bodyID_t* idA, + const bodyID_t* idB, + const notStupidBool_t* groupWinnerIsA, + const notStupidBool_t* groupWinnerIsTri, + const notStupidBool_t* groupForceSingleIsland, + bodyID_t* winnerID, + notStupidBool_t* winnerIsTri, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const contactPairs_t grp = groupIndex[myID]; + if (groupForceSingleIsland[grp] != 0) { + winnerID[myID] = 0; + winnerIsTri[myID] = 0; + return; + } + const bool pickA = (groupWinnerIsA[grp] != 0); + winnerID[myID] = pickA ? idA[myID] : idB[myID]; + winnerIsTri[myID] = groupWinnerIsTri[grp]; + } +} + +// Build active triangle keys for compacting (groupIndex, triID). +__global__ void buildActiveTriKeys(const contactPairs_t* groupIndex, + const bodyID_t* winnerID, + const notStupidBool_t* winnerIsTri, + uint64_t* keys, + notStupidBool_t* flags, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const notStupidBool_t is_tri = winnerIsTri[myID]; + flags[myID] = is_tri; + if (is_tri) { + keys[myID] = (static_cast(groupIndex[myID]) << 32) | static_cast(winnerID[myID]); + } else { + keys[myID] = 0; + } + } +} + +// Initialize labels from active triangle keys (label = triID). +__global__ void initActiveTriLabels(const uint64_t* keys, bodyID_t* labels, size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + labels[myID] = static_cast(keys[myID] & 0xffffffffull); + } +} + +// Count active triangles per group (atomic add). +__global__ void countActiveTriPerGroup(const uint64_t* keys, contactPairs_t* groupCounts, size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const contactPairs_t grp = static_cast(keys[myID] >> 32); + atomicAdd(&groupCounts[grp], (contactPairs_t)1); + } +} + +// Label propagation for active triangles within each group. +__global__ void propagateActiveTriLabels(const uint64_t* keys, + const bodyID_t* labelsIn, + bodyID_t* labelsOut, + const contactPairs_t* groupStart, + const contactPairs_t* groupCount, + const bodyID_t* triNeighbor1, + const bodyID_t* triNeighbor2, + const bodyID_t* triNeighbor3, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const uint64_t key = keys[myID]; + const contactPairs_t grp = static_cast(key >> 32); + const bodyID_t triID = static_cast(key & 0xffffffffull); + const contactPairs_t start = groupStart[grp]; + const contactPairs_t count = groupCount[grp]; + bodyID_t label = labelsIn[myID]; + + bodyID_t nbs[3] = {triNeighbor1[triID], triNeighbor2[triID], triNeighbor3[triID]}; + for (int e = 0; e < 3; ++e) { + const bodyID_t nb = nbs[e]; + if (nb == NULL_BODYID || count == 0) { + continue; + } + const uint64_t target = (static_cast(grp) << 32) | static_cast(nb); + contactPairs_t left = 0; + contactPairs_t right = count; + while (left < right) { + contactPairs_t mid = left + (right - left) / 2; + const uint64_t mid_key = keys[start + mid]; + if (mid_key < target) { + left = mid + 1; + } else { + right = mid; + } + } + if (left < count) { + const uint64_t found = keys[start + left]; + if (found == target) { + const bodyID_t nb_label = labelsIn[start + left]; + if (nb_label < label) { + label = nb_label; + } + } + } + } + labelsOut[myID] = label; + } +} + +// Assign per-contact island labels using winner primitive and active triangle labels. +__global__ void assignContactIslandLabel(const contactPairs_t* groupIndex, + const bodyID_t* winnerID, + const notStupidBool_t* winnerIsTri, + const uint64_t* activeKeys, + const bodyID_t* activeLabels, + const contactPairs_t* groupStart, + const contactPairs_t* groupCount, + bodyID_t* outLabels, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const bodyID_t prim = winnerID[myID]; + if (winnerIsTri[myID] == 0) { + outLabels[myID] = prim; + return; + } + const contactPairs_t grp = groupIndex[myID]; + const contactPairs_t start = groupStart[grp]; + const contactPairs_t count = groupCount[grp]; + if (count == 0) { + outLabels[myID] = prim; + return; + } + const uint64_t target = (static_cast(grp) << 32) | static_cast(prim); + contactPairs_t left = 0; + contactPairs_t right = count; + while (left < right) { + contactPairs_t mid = left + (right - left) / 2; + const uint64_t mid_key = activeKeys[start + mid]; + if (mid_key < target) { + left = mid + 1; + } else { + right = mid; + } + } + if (left < count && activeKeys[start + left] == target) { + outLabels[myID] = activeLabels[start + left]; + } else { + outLabels[myID] = prim; + } + } +} + +// Simple copy kernel for bodyID arrays. +__global__ void copyBodyIDArray(const bodyID_t* in, bodyID_t* out, size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + out[myID] = in[myID]; + } +} + +// Build composite key parts (contactType + patchA, patchB + label) for island grouping. +__global__ void buildIslandCompositeKeyParts(const patchIDPair_t* patchPairs, + const contact_t* contactTypes, + const bodyID_t* labels, + uint64_t* key_hi, + uint64_t* key_lo, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + const patchIDPair_t pair = patchPairs[myID]; + const uint64_t hi = static_cast(pair >> 32); + const uint64_t lo = static_cast(pair & 0xffffffffull); + // key_hi: contactType + patchA (primary key) + key_hi[myID] = (static_cast(contactTypes[myID]) << 32) | hi; + // key_lo: patchB + island label (secondary key) + key_lo[myID] = (lo << 32) | static_cast(labels[myID]); + } +} + +// Mark new composite groups for sorted (key_hi, key_lo) arrays. +__global__ void markNewCompositeGroups64(const uint64_t* key_hi, + const uint64_t* key_lo, + contactPairs_t* isNewGroup, + size_t n) { + contactPairs_t myID = blockIdx.x * blockDim.x + threadIdx.x; + if (myID < n) { + if (myID == 0) { + isNewGroup[myID] = 0; + } else { + const bool new_hi = key_hi[myID] != key_hi[myID - 1]; + const bool new_lo = key_lo[myID] != key_lo[myID - 1]; + isNewGroup[myID] = (new_hi || new_lo) ? 1 : 0; + } + } +} + // Build a sortable 64-bit key from (idB, contactType, persistency_preference). // - High 32 bits: idB (so contacts with the same idB group together) // - Low bits: contactType then persistency (so within a duplicate group, the preferred contact comes first) @@ -325,8 +631,10 @@ __global__ void setNullMappingForType(contactPairs_t* contactMapping, // prev_count: Number of contacts of this type in previous step __global__ void buildPatchContactMappingForType(bodyID_t* curr_idPatchA, bodyID_t* curr_idPatchB, + bodyID_t* curr_patchIsland, bodyID_t* prev_idPatchA, bodyID_t* prev_idPatchB, + bodyID_t* prev_patchIsland, contactPairs_t* contactMapping, contactPairs_t curr_start, contactPairs_t curr_count, @@ -339,6 +647,7 @@ __global__ void buildPatchContactMappingForType(bodyID_t* curr_idPatchA, bodyID_t curr_A = curr_idPatchA[curr_idx]; bodyID_t curr_B = curr_idPatchB[curr_idx]; + bodyID_t curr_L = curr_patchIsland[curr_idx]; // Default: no match found contactPairs_t my_partner = NULL_MAPPING_PARTNER; @@ -353,8 +662,9 @@ __global__ void buildPatchContactMappingForType(bodyID_t* curr_idPatchA, bodyID_t prev_A = prev_idPatchA[prev_idx]; bodyID_t prev_B = prev_idPatchB[prev_idx]; - // Compare (A, B) pairs lexicographically - if (prev_A < curr_A || (prev_A == curr_A && prev_B < curr_B)) { + // Compare (A, B, label) lexicographically + if (prev_A < curr_A || (prev_A == curr_A && (prev_B < curr_B || + (prev_B == curr_B && prev_patchIsland[prev_idx] < curr_L)))) { left = mid + 1; } else { right = mid; @@ -366,7 +676,8 @@ __global__ void buildPatchContactMappingForType(bodyID_t* curr_idPatchA, contactPairs_t prev_idx = prev_start + left; bodyID_t prev_A = prev_idPatchA[prev_idx]; bodyID_t prev_B = prev_idPatchB[prev_idx]; - if (prev_A == curr_A && prev_B == curr_B) { + bodyID_t prev_L = prev_patchIsland[prev_idx]; + if (prev_A == curr_A && prev_B == curr_B && prev_L == curr_L) { my_partner = prev_idx; } } @@ -380,9 +691,11 @@ __global__ void buildPatchContactMappingForType(bodyID_t* curr_idPatchA, // For each current contact, we use binary search to find the matching contact in the previous array. __global__ void buildPatchContactMapping(bodyID_t* curr_idPatchA, bodyID_t* curr_idPatchB, + bodyID_t* curr_patchIsland, contact_t* curr_contactTypePatch, bodyID_t* prev_idPatchA, bodyID_t* prev_idPatchB, + bodyID_t* prev_patchIsland, contact_t* previous_contactTypePatch, contactPairs_t* contactMapping, size_t numCurrContacts, @@ -391,6 +704,7 @@ __global__ void buildPatchContactMapping(bodyID_t* curr_idPatchA, if (myID < numCurrContacts) { bodyID_t curr_A = curr_idPatchA[myID]; bodyID_t curr_B = curr_idPatchB[myID]; + bodyID_t curr_L = curr_patchIsland[myID]; contact_t curr_type = curr_contactTypePatch[myID]; // Default: no match found @@ -426,19 +740,19 @@ __global__ void buildPatchContactMapping(bodyID_t* curr_idPatchA, } size_t type_end = left; - // Within this type segment, use binary search to find the matching A/B pair - // The segment is sorted by the combined patch ID pair (A in high bits, B in low bits) - // The encoding ensures that (smaller_A, larger_B) pattern creates a sortable value + // Within this type segment, use binary search to find the matching A/B/label triple + // The segment is sorted by patch pair then island label. left = type_start; right = type_end; while (left < right) { size_t mid = left + (right - left) / 2; bodyID_t prev_A = prev_idPatchA[mid]; bodyID_t prev_B = prev_idPatchB[mid]; + bodyID_t prev_L = prev_patchIsland[mid]; // Compare (A, B) pairs lexicographically // Since they're sorted by patch ID pair where smaller ID is in high bits - if (prev_A < curr_A || (prev_A == curr_A && prev_B < curr_B)) { + if (prev_A < curr_A || (prev_A == curr_A && (prev_B < curr_B || (prev_B == curr_B && prev_L < curr_L)))) { left = mid + 1; } else { right = mid; @@ -449,7 +763,8 @@ __global__ void buildPatchContactMapping(bodyID_t* curr_idPatchA, if (left < type_end) { bodyID_t prev_A = prev_idPatchA[left]; bodyID_t prev_B = prev_idPatchB[left]; - if (prev_A == curr_A && prev_B == curr_B) { + bodyID_t prev_L = prev_patchIsland[left]; + if (prev_A == curr_A && prev_B == curr_B && prev_L == curr_L) { my_partner = left; } } diff --git a/src/algorithms/DEMStaticDeviceSubroutines.h b/src/algorithms/DEMStaticDeviceSubroutines.h index 87cf6685..252930b3 100644 --- a/src/algorithms/DEMStaticDeviceSubroutines.h +++ b/src/algorithms/DEMStaticDeviceSubroutines.h @@ -128,6 +128,8 @@ void contactDetection(std::shared_ptr& bin_sphere_kern DualArray& previous_idPatchB, DualArray& contactTypePatch, DualArray& previous_contactTypePatch, + DualArray& contactPatchIsland, + DualArray& previous_contactPatchIsland, ContactTypeMap>& typeStartCountPatchMap, DualArray& geomToPatchMap, cudaStream_t& this_stream, @@ -149,6 +151,7 @@ void overwritePrevContactArrays(DualStruct& kT_data, DualArray& previous_idPatchA, DualArray& previous_idPatchB, DualArray& previous_contactTypePatch, + DualArray& previous_contactPatchIsland, ContactTypeMap>& typeStartCountPatchMap, DualStruct& simParams, DEMSolverScratchData& scratchPad, diff --git a/src/demo/DEMdemo_DrumCubes.cpp b/src/demo/DEMdemo_DrumCubes.cpp index d47d05bf..a9a4faf1 100644 --- a/src/demo/DEMdemo_DrumCubes.cpp +++ b/src/demo/DEMdemo_DrumCubes.cpp @@ -43,6 +43,8 @@ int main() { // Load cube mesh template (12 triangles) and scale to 10 mm auto cube_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_type_cube, true, false); cube_template->Scale(cube_size); + cube_template->SetConvex(true); + cube_template->SetNeverWinner(true); // Drum definition float3 CylCenter = make_float3(0, 0, 0); diff --git a/src/demo/DEMdemo_ResponseAngleMesh.cpp b/src/demo/DEMdemo_ResponseAngleMesh.cpp index f5064ea8..85370fd9 100644 --- a/src/demo/DEMdemo_ResponseAngleMesh.cpp +++ b/src/demo/DEMdemo_ResponseAngleMesh.cpp @@ -94,8 +94,8 @@ int main() { // --------------------- Particle settings block --------------------- // Mesh file can be .stl or .obj (path is relative to data/mesh). - const path particle_mesh_file = GET_DATA_PATH() / "mesh" / "cube.obj"; // "simpleTriangleShape4mm.stl" - const float particle_mesh_scale = mm_to_m * 5.0f; // 1.0f for STLs in mm size + const path particle_mesh_file = GET_DATA_PATH() / "mesh" / "cross_fine.stl"; // "simpleTriangleShape4mm.stl" + const float particle_mesh_scale = mm_to_m * 0.5f; // 1.0f for STLs in mm size const unsigned int target_particles = 5000; // ------------------------------------------------------------------- @@ -141,11 +141,32 @@ int main() { auto cap_tracker = DEMSim.Track(end_caps); // Sample particles inside the cylindrical volume with a small wall clearance. - const float sample_radius = drum_inner_radius - wall_clearance - tri_radius; - const float sample_halfheight = drum_height / 2.0f - wall_clearance - tri_radius; + const float r_sphere = tri_radius; // = 0.5 * tri_diag + // AABB clearance for a cylinder aligned with z: + // radial clearance uses the half-diagonal in XY; z-clearance uses half-height in Z. + const float r_xy_aabb = 0.5f * std::sqrt(tri_dims.x * tri_dims.x + tri_dims.y * tri_dims.y); + const float r_z_aabb = 0.5f * tri_dims.z; + // Spacing of the HCP lattice (center-to-center). Keep conservative spacing (uses tri_diag). + // Clearance model only changes usable container dimensions. HCPSampler sampler(tri_diag * 1.01f); - auto candidate_pos = - sampler.SampleCylinderZ(make_float3(0, 0, drum_height / 2.0f), sample_radius, sample_halfheight); + auto sample_with_clearance = [&](float r_xy, float r_z) { + const float sample_radius = drum_inner_radius - wall_clearance - r_xy; + const float sample_halfheight = drum_height * 0.5f - wall_clearance - r_z; + // Guard against negative dimensions + if (sample_radius <= 0.f || sample_halfheight <= 0.f) { + return std::vector{}; + } + + return sampler.SampleCylinderZ(make_float3(0, 0, drum_height / 2.0f), sample_radius, sample_halfheight); + }; + // Generate both candidate sets + auto cand_sphere = sample_with_clearance(r_sphere, r_sphere); + auto cand_aabb = sample_with_clearance(r_xy_aabb, r_z_aabb); + // Pick denser (more points). If equal, prefer sphere for robustness. + bool use_aabb = cand_aabb.size() > cand_sphere.size(); + auto& candidate_pos = use_aabb ? cand_aabb : cand_sphere; + std::cout << "Sampling clearance mode: " << (use_aabb ? "AABB" : "Sphere") + << " (AABB=" << cand_aabb.size() << ", Sphere=" << cand_sphere.size() << ")\n"; if (candidate_pos.size() < target_particles) { DEME_WARNING("Sampler produced fewer points (%zu) than requested (%u). Using all generated points.", candidate_pos.size(), target_particles); From 351a8316c8695bbe55154e9f7aa4fddff4a2160a Mon Sep 17 00:00:00 2001 From: Florian Reinle Date: Sat, 31 Jan 2026 14:46:56 +0100 Subject: [PATCH 17/17] Cleanup and expanded SimpleCollsion demo - clean up mesh splitting section inculding test - added full fast path for convex shape that never win (contact island always more coarse on their side) - SimpleCollision demo expanded --- src/DEM/API.h | 3 + src/DEM/APIPrivate.cpp | 81 +- src/DEM/APIPublic.cpp | 5 +- src/DEM/Defines.h | 4 + src/DEM/MeshUtils.cpp | 851 +----------------- src/DEM/dT.cpp | 45 +- src/DEM/dT.h | 20 +- src/DEM/kT.cpp | 44 +- src/DEM/kT.h | 8 +- src/DEM/utils/HostSideHelpers.hpp | 73 ++ src/algorithms/DEMContactDetection.cu | 1 + src/algorithms/DEMContactDetectionKernels.cuh | 12 +- src/demo/DEMdemo_ResponseAngleMesh.cpp | 6 +- src/demo/ModularTests/CMakeLists.txt | 1 - src/demo/ModularTests/DEMTest_MeshPatch.cpp | 271 ------ .../ModularTests/DEMTest_SimpleCollisions.cpp | 257 ++++-- 16 files changed, 367 insertions(+), 1315 deletions(-) delete mode 100644 src/demo/ModularTests/DEMTest_MeshPatch.cpp diff --git a/src/DEM/API.h b/src/DEM/API.h index 3ba4c82e..22a0376d 100644 --- a/src/DEM/API.h +++ b/src/DEM/API.h @@ -1750,6 +1750,8 @@ class DEMSolver { size_t nSpheresGM = 0; // Total number of triangle facets size_t nTriGM = 0; + // Total number of triangles that need neighbor info (compact neighbor array size) + size_t nTriNeighbors = 0; // Total number of mesh patches size_t nMeshPatches = 0; // Number of analytical entites (as components of some external objects) @@ -2080,6 +2082,7 @@ class DEMSolver { size_t nSpheres, size_t nTriMesh, size_t nFacets, + size_t nTriNeighbors, size_t nMeshPatches, unsigned int nExtObj_old, unsigned int nAnalGM_old); diff --git a/src/DEM/APIPrivate.cpp b/src/DEM/APIPrivate.cpp index 531c860f..8e7be66e 100644 --- a/src/DEM/APIPrivate.cpp +++ b/src/DEM/APIPrivate.cpp @@ -31,32 +31,12 @@ struct EdgeInfo { int edge = 0; }; -struct QuantKey3 { - int64_t x, y, z; - bool operator==(const QuantKey3& o) const noexcept { return x == o.x && y == o.y && z == o.z; } -}; -struct QuantKey3Hash { - size_t operator()(const QuantKey3& k) const noexcept { - size_t h1 = std::hash{}(k.x); - size_t h2 = std::hash{}(k.y); - size_t h3 = std::hash{}(k.z); - size_t h = h1; - h ^= h2 + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); - h ^= h3 + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); - return h; - } -}; - inline uint64_t makeEdgeKey(int a, int b) { const uint32_t lo = static_cast(std::min(a, b)); const uint32_t hi = static_cast(std::max(a, b)); return (static_cast(lo) << 32) | static_cast(hi); } -static inline int64_t quantize(double v, double eps) { - return static_cast(std::llround(v / eps)); -} - std::vector> buildTriangleEdgeNeighbors(const std::vector& face_v_indices, const std::vector& vertices) { const size_t n_faces = face_v_indices.size(); @@ -67,36 +47,8 @@ std::vector> buildTriangleEdgeNeighbors(const std::vecto std::vector canon; if (!vertices.empty()) { - double minx = vertices[0].x, miny = vertices[0].y, minz = vertices[0].z; - double maxx = minx, maxy = miny, maxz = minz; - for (const auto& v : vertices) { - minx = std::min(minx, (double)v.x); - miny = std::min(miny, (double)v.y); - minz = std::min(minz, (double)v.z); - maxx = std::max(maxx, (double)v.x); - maxy = std::max(maxy, (double)v.y); - maxz = std::max(maxz, (double)v.z); - } - const double dx = maxx - minx, dy = maxy - miny, dz = maxz - minz; - const double diag = std::sqrt(dx * dx + dy * dy + dz * dz); - const double eps = std::max(diag * 1e-9, 1e-12); - - std::unordered_map rep; - rep.reserve(vertices.size()); - canon.assign(vertices.size(), static_cast(-1)); - size_t next_id = 0; - for (size_t i = 0; i < vertices.size(); ++i) { - const auto& v = vertices[i]; - QuantKey3 key{quantize(v.x, eps), quantize(v.y, eps), quantize(v.z, eps)}; - auto it = rep.find(key); - if (it == rep.end()) { - rep.emplace(key, next_id); - canon[i] = next_id; - next_id++; - } else { - canon[i] = it->second; - } - } + const double eps = computeVertexQuantEps(vertices); + canon = buildCanonicalVertexMap(vertices, eps); } std::unordered_map> edge_map; @@ -973,7 +925,12 @@ void DEMSolver::preprocessTriangleObjs() { m_mesh_facet_owner.insert(m_mesh_facet_owner.end(), mesh_obj->GetNumTriangles(), thisMeshObj); const bodyID_t tri_offset = static_cast(m_mesh_facets.size()); - const auto local_neighbors = buildTriangleEdgeNeighbors(mesh_obj->m_face_v_indices, mesh_obj->m_vertices); + std::vector> local_neighbors; + if (mesh_obj->IsConvex() && mesh_obj->IsNeverWinner()) { + local_neighbors.assign(mesh_obj->GetNumTriangles(), {NULL_BODYID, NULL_BODYID, NULL_BODYID}); + } else { + local_neighbors = buildTriangleEdgeNeighbors(mesh_obj->m_face_v_indices, mesh_obj->m_vertices); + } // Force single-patch semantics: one patch per mesh (all facets in patch 0) if (mesh_obj->patches_explicitly_set || mesh_obj->GetNumPatches() > 1) { @@ -1469,17 +1426,30 @@ void DEMSolver::setSimParams() { } void DEMSolver::allocateGPUArrays() { + size_t tri_neighbors = 0; + for (const auto& mesh_obj : cached_mesh_objs) { + if (!mesh_obj) { + continue; + } + if (!(mesh_obj->IsConvex() && mesh_obj->IsNeverWinner())) { + tri_neighbors += mesh_obj->GetNumTriangles(); + } + } + nTriNeighbors = tri_neighbors; + // Resize arrays based on the statistical data we have std::thread dThread = std::move(std::thread([this]() { this->dT->allocateGPUArrays(this->nOwnerBodies, this->nOwnerClumps, this->nExtObj, this->nTriMeshes, - this->nSpheresGM, this->nTriGM, this->nMeshPatches, this->nAnalGM, + this->nSpheresGM, this->nTriGM, this->nTriNeighbors, this->nMeshPatches, + this->nAnalGM, this->nExtraContacts, this->nDistinctMassProperties, this->nDistinctClumpBodyTopologies, this->nDistinctClumpComponents, this->nJitifiableClumpComponents, this->nMatTuples); })); std::thread kThread = std::move(std::thread([this]() { this->kT->allocateGPUArrays(this->nOwnerBodies, this->nOwnerClumps, this->nExtObj, this->nTriMeshes, - this->nSpheresGM, this->nTriGM, this->nAnalGM, this->nExtraContacts, + this->nSpheresGM, this->nTriGM, this->nTriNeighbors, this->nAnalGM, + this->nExtraContacts, this->nDistinctMassProperties, this->nDistinctClumpBodyTopologies, this->nDistinctClumpComponents, this->nJitifiableClumpComponents, this->nMatTuples); })); @@ -1542,6 +1512,7 @@ void DEMSolver::updateClumpMeshArrays(size_t nOwners, size_t nSpheres, size_t nTriMesh, size_t nFacets, + size_t nTriNeighbors, size_t nMeshPatches, unsigned int nExtObj, unsigned int nAnalGM) { @@ -1572,7 +1543,7 @@ void DEMSolver::updateClumpMeshArrays(size_t nOwners, // I/O and misc. m_no_output_families, m_tracked_objs, // Number of entities, old - nOwners, nClumps, nSpheres, nTriMesh, nFacets, nMeshPatches, nExtObj, nAnalGM); + nOwners, nClumps, nSpheres, nTriMesh, nFacets, nTriNeighbors, nMeshPatches, nExtObj, nAnalGM); kT->updateClumpMeshArrays( // Clump batchs' initial stats cached_input_clump_batches, @@ -1588,7 +1559,7 @@ void DEMSolver::updateClumpMeshArrays(size_t nOwners, // Templates and misc. flattened_clump_templates, // Number of entities, old - nOwners, nClumps, nSpheres, nTriMesh, nFacets, nMeshPatches, nExtObj, nAnalGM); + nOwners, nClumps, nSpheres, nTriMesh, nFacets, nTriNeighbors, nMeshPatches, nExtObj, nAnalGM); } void DEMSolver::packDataPointers() { diff --git a/src/DEM/APIPublic.cpp b/src/DEM/APIPublic.cpp index e79e8ee0..b572acca 100644 --- a/src/DEM/APIPublic.cpp +++ b/src/DEM/APIPublic.cpp @@ -2524,6 +2524,7 @@ void DEMSolver::Update() { size_t nSpheres_old = nSpheresGM; size_t nTriMesh_old = nTriMeshes; size_t nFacets_old = nTriGM; + size_t nTriNeighbors_old = nTriNeighbors; size_t nPatch_old = nMeshPatches; unsigned int nAnalGM_old = nAnalGM; unsigned int nExtObj_old = nExtObj; @@ -2534,8 +2535,8 @@ void DEMSolver::Update() { updateTotalEntityNum(); allocateGPUArrays(); // `Update' method needs to know the number of existing clumps and spheres (before this addition) - updateClumpMeshArrays(nOwners_old, nClumps_old, nSpheres_old, nTriMesh_old, nFacets_old, nPatch_old, nExtObj_old, - nAnalGM_old); + updateClumpMeshArrays(nOwners_old, nClumps_old, nSpheres_old, nTriMesh_old, nFacets_old, nTriNeighbors_old, + nPatch_old, nExtObj_old, nAnalGM_old); packDataPointers(); // Now that all params prepared, and all data pointers packed on host side, we need to migrate that imformation to diff --git a/src/DEM/Defines.h b/src/DEM/Defines.h index a9c7f799..462a05f1 100644 --- a/src/DEM/Defines.h +++ b/src/DEM/Defines.h @@ -392,6 +392,8 @@ struct DEMDataDT { notStupidBool_t* ownerMeshConvex; notStupidBool_t* ownerMeshNeverWinner; bodyID_t* triPatchID; + // Map global triangle ID -> compact neighbor index (NULL_BODYID if neighbors are not stored) + bodyID_t* triNeighborIndex; bodyID_t* triNeighbor1; bodyID_t* triNeighbor2; bodyID_t* triNeighbor3; @@ -473,6 +475,8 @@ struct DEMDataKT { notStupidBool_t* ownerMeshConvex; notStupidBool_t* ownerMeshNeverWinner; bodyID_t* triPatchID; + // Map global triangle ID -> compact neighbor index (NULL_BODYID if neighbors are not stored) + bodyID_t* triNeighborIndex; bodyID_t* triNeighbor1; bodyID_t* triNeighbor2; bodyID_t* triNeighbor3; diff --git a/src/DEM/MeshUtils.cpp b/src/DEM/MeshUtils.cpp index daee8c4d..12ad108f 100644 --- a/src/DEM/MeshUtils.cpp +++ b/src/DEM/MeshUtils.cpp @@ -795,804 +795,6 @@ static std::vector> buildAdjacencyWithEdgeInfo(const st return adj; } -// ------------------------------------------------------------ -// Smart patch splitter -// ------------------------------------------------------------ -unsigned int DEMMesh::SplitIntoConvexPatches(float hard_angle_deg, - const PatchSplitOptions& opt_in, - PatchQualityReport* out_report, - const PatchQualityOptions& qopt) { - if (nTri == 0) { - patches_explicitly_set = false; - nPatches = 1; - if (out_report) { - out_report->overall = PatchQualityLevel::SAFE; - out_report->constraint_status = PatchConstraintStatus::SATISFIED; - out_report->achieved_patches = 1; - out_report->requested_min = 1; - out_report->requested_max = 1; - out_report->per_patch.clear(); - } - return 0; - } - - if (hard_angle_deg <= 0.0f) { - DEME_ERROR("SplitIntoConvexPatches: hard_angle_deg must be > 0."); - } - if (opt_in.patch_min == 0) { - DEME_ERROR("SplitIntoConvexPatches: patch_min must be >= 1."); - } - if (opt_in.patch_min > opt_in.patch_max) { - DEME_ERROR("SplitIntoConvexPatches: patch_min cannot be > patch_max."); - } - - // Copy options (we may adjust defaults in a controlled way) - PatchSplitOptions opt = opt_in; - - hard_angle_deg = std::min(180.0f, std::max(0.0f, hard_angle_deg)); - - // Resolve hysteresis - float soft_angle_deg = (opt.soft_angle_deg >= 0.0f) ? opt.soft_angle_deg : hard_angle_deg; - soft_angle_deg = std::min(hard_angle_deg, std::max(0.0f, soft_angle_deg)); - - // If user activates hysteresis (soft < hard) but didn't enable patch-normal gating, set a sensible default: - // otherwise the mid-band has no extra decision signal. - bool patch_gate_enabled = (opt.patch_normal_max_deg >= 0.0f); - if (!patch_gate_enabled && soft_angle_deg < hard_angle_deg) { - opt.patch_normal_max_deg = soft_angle_deg; - patch_gate_enabled = true; - } - - float patch_normal_max_deg = opt.patch_normal_max_deg; // may be <0 => disabled - if (patch_gate_enabled) { - patch_normal_max_deg = std::min(180.0f, std::max(0.0f, patch_normal_max_deg)); - } - - const float cos_hard = std::cos(deg2rad(hard_angle_deg)); - const float cos_soft = std::cos(deg2rad(soft_angle_deg)); - float cos_patch = -1.0f; - if (patch_gate_enabled) { - cos_patch = std::cos(deg2rad(patch_normal_max_deg)); - } - - // Precompute face normals and areas - std::vector face_normals(nTri); - std::vector face_areas(nTri, 0.0f); - for (size_t i = 0; i < nTri; ++i) { - const int3& f = m_face_v_indices[i]; - const float3& v0 = m_vertices[f.x]; - const float3& v1 = m_vertices[f.y]; - const float3& v2 = m_vertices[f.z]; - face_normals[i] = computeFaceNormal(v0, v1, v2); - face_areas[i] = computeTriangleArea(v0, v1, v2); - if (face_areas[i] <= DEME_TINY_FLOAT) - face_areas[i] = 0.0f; - } - - // Adjacency with edge info - auto adjacency = buildAdjacencyWithEdgeInfo(m_face_v_indices); - - // Seed order - std::vector seeds(nTri); - for (size_t i = 0; i < nTri; ++i) - seeds[i] = i; - if (opt.seed_largest_first) { - std::stable_sort(seeds.begin(), seeds.end(), [&](size_t a, size_t b) { return face_areas[a] > face_areas[b]; }); - } - - // Core segmentation routine (no post-merge/split) - auto segment_once = [&](const PatchSplitOptions& o, - float soft_deg, - bool patch_gate, - float cosPatch, - std::vector& out_ids, - unsigned int& out_nP) { - out_ids.assign(nTri, (patchID_t)-1); - - int current_patch_id = 0; - std::vector queue; - queue.reserve(256); - - for (size_t si = 0; si < nTri; ++si) { - size_t seed = seeds[si]; - if (out_ids[seed] != (patchID_t)-1) - continue; - - if (current_patch_id > std::numeric_limits::max()) { - DEME_ERROR("SplitIntoPatches: too many patches for patchID_t."); - } - - float3 sumN = mul3(face_normals[seed], face_areas[seed]); - float sumA = face_areas[seed]; - float3 patchN = normalize3(sumN); - - queue.clear(); - queue.push_back(seed); - out_ids[seed] = (patchID_t)current_patch_id; - - size_t qi = 0; - while (qi < queue.size()) { - size_t cur = queue[qi++]; - - for (const auto& e : adjacency[cur]) { - size_t nb = e.nbr; - if (out_ids[nb] != (patchID_t)-1) - continue; - - const float3& n_cur = face_normals[cur]; - const float3& n_nb = face_normals[nb]; - - // Hard barrier (mandatory) - float d_cn = clamp11(dot3(n_cur, n_nb)); - if (d_cn < cos_hard) - continue; - - // Optional concavity barrier - if (o.block_concave_edges && e.oriented_ok) { - const float3& vA = m_vertices[e.va]; - const float3& vB = m_vertices[e.vb]; - float dih = signedDihedralDeg(n_cur, n_nb, vA, vB); - if (dih < -o.concave_allow_deg) - continue; - } - - // Hysteresis band: - // - if below soft: we still require patch gate if enabled (otherwise accept) - // - if between soft and hard: require patch gate if enabled; otherwise accept (legacy-like) - bool in_soft = (d_cn >= cos_soft); - - if (patch_gate) { - float d_pn = clamp11(dot3(patchN, n_nb)); - if (d_pn < cosPatch) - continue; - // pass patch gate => accept - } else { - // no patch gate => legacy-like behavior (soft only matters if patch gate is active) - (void)in_soft; - } - - out_ids[nb] = (patchID_t)current_patch_id; - queue.push_back(nb); - - if (face_areas[nb] > 0.0f) { - sumN = add3(sumN, mul3(n_nb, face_areas[nb])); - sumA += face_areas[nb]; - patchN = normalize3(sumN); - } - } - } - - current_patch_id++; - } - - out_nP = (unsigned int)current_patch_id; - }; - - // A small helper to compress patch IDs to [0..nP-1] - auto compress_ids = [&](std::vector& ids, unsigned int& out_nP) { - auto res = rank_transform(ids); - ids = std::move(res.first); - // recompute nP - patchID_t mx = 0; - for (auto v : ids) - if (v > mx) mx = v; - out_nP = (unsigned int)(mx + 1); - }; - - // Enforce patch_max by merging adjacent patches where allowed (hard/concave respected) - auto enforce_patch_max = [&](std::vector& ids, unsigned int& pcount, PatchConstraintStatus& cstat) { - if (pcount <= opt.patch_max) - return; - - // Build patch mean normals (area-weighted) - std::vector pSumN(pcount, make_float3(0, 0, 0)); - std::vector pSumA(pcount, 0.0f); - - for (size_t t = 0; t < nTri; ++t) { - int p = (int)ids[t]; - if (face_areas[t] > 0.0f) { - pSumN[p] = add3(pSumN[p], mul3(face_normals[t], face_areas[t])); - pSumA[p] += face_areas[t]; - } - } - - struct DSU { - std::vector parent, rnk; - std::vector* sumN; - std::vector* sumA; - - DSU(int n, std::vector& sN, std::vector& sA) : parent(n), rnk(n, 0), sumN(&sN), sumA(&sA) { - for (int i = 0; i < n; ++i) parent[i] = i; - } - int find(int x) { - while (parent[x] != x) { - parent[x] = parent[parent[x]]; - x = parent[x]; - } - return x; - } - bool unite(int a, int b) { - a = find(a); b = find(b); - if (a == b) return false; - if (rnk[a] < rnk[b]) std::swap(a, b); - parent[b] = a; - if (rnk[a] == rnk[b]) rnk[a]++; - (*sumN)[a] = add3((*sumN)[a], (*sumN)[b]); - (*sumA)[a] += (*sumA)[b]; - return true; - } - float3 patchN(int x) { - x = find(x); - return normalize3((*sumN)[x]); - } - }; - - DSU dsu((int)pcount, pSumN, pSumA); - - struct Cand { float cost; int a; int b; }; - struct Cmp { bool operator()(const Cand& x, const Cand& y) const { return x.cost > y.cost; } }; - - auto cost_between = [&](int a, int b) { - float3 na = dsu.patchN(a); - float3 nb = dsu.patchN(b); - float d = clamp11(dot3(na, nb)); - return 1.0f - d; // smaller is better (more parallel) - }; - - // Candidate patch adjacency across mergeable edges (hard + optional concavity) - std::map, float> best_cost; - - for (size_t t = 0; t < nTri; ++t) { - int pt = (int)ids[t]; - for (const auto& e : adjacency[t]) { - size_t nb = e.nbr; - int pn = (int)ids[nb]; - if (pt == pn) - continue; - - float d = clamp11(dot3(face_normals[t], face_normals[nb])); - if (d < cos_hard) - continue; - - if (opt.block_concave_edges && e.oriented_ok) { - const float3& vA = m_vertices[e.va]; - const float3& vB = m_vertices[e.vb]; - float dih = signedDihedralDeg(face_normals[t], face_normals[nb], vA, vB); - if (dih < -opt.concave_allow_deg) - continue; - } - - int a = std::min(pt, pn); - int b = std::max(pt, pn); - float c = cost_between(a, b); - - auto key = std::make_pair(a, b); - auto it = best_cost.find(key); - if (it == best_cost.end() || c < it->second) - best_cost[key] = c; - } - } - - std::priority_queue, Cmp> pq; - for (const auto& kv : best_cost) - pq.push(Cand{kv.second, kv.first.first, kv.first.second}); - - unsigned int cur = pcount; - while (cur > opt.patch_max && !pq.empty()) { - auto c = pq.top(); pq.pop(); - int ra = dsu.find(c.a); - int rb = dsu.find(c.b); - if (ra == rb) - continue; - if (dsu.unite(ra, rb)) - cur--; - } - - // If we couldn't merge enough, mark as unmergeable - if (cur > opt.patch_max) - cstat = PatchConstraintStatus::TOO_MANY_UNMERGEABLE; - - // Write back merged ids and compress - std::unordered_map rep2new; - rep2new.reserve(pcount * 2); - - patchID_t next = 0; - for (size_t i = 0; i < nTri; ++i) { - int p = (int)ids[i]; - int r = dsu.find(p); - auto it = rep2new.find(r); - if (it == rep2new.end()) { - rep2new.emplace(r, next); - ids[i] = next; - next++; - } else { - ids[i] = it->second; - } - } - pcount = (unsigned int)next; - }; - - // Enforce patch_min by splitting worst-spread patches (count-only) - auto enforce_patch_min = [&](std::vector& ids, unsigned int& pcount, PatchConstraintStatus& cstat) { - if (pcount >= opt.patch_min) - return; - - auto rebuild_patch_lists = [&](std::vector>& pTris) { - pTris.assign(pcount, {}); - for (size_t i = 0; i < nTri; ++i) { - int p = (int)ids[i]; - pTris[p].push_back(i); - } - }; - - std::vector> pTris; - rebuild_patch_lists(pTris); - - auto patch_mean_normal = [&](int p) { - float3 sumN = make_float3(0, 0, 0); - float sumA = 0.0f; - for (size_t t : pTris[p]) { - if (face_areas[t] > 0.0f) { - sumN = add3(sumN, mul3(face_normals[t], face_areas[t])); - sumA += face_areas[t]; - } - } - (void)sumA; - return normalize3(sumN); - }; - - auto pick_patch_to_split = [&]() -> int { - float worst = 1.0f; - int worst_p = -1; - for (int p = 0; p < (int)pcount; ++p) { - if (pTris[p].size() < 2) - continue; - float3 pn = patch_mean_normal(p); - float minDot = 1.0f; - for (size_t t : pTris[p]) { - float d = clamp11(dot3(pn, face_normals[t])); - minDot = std::min(minDot, d); - } - if (minDot < worst) { - worst = minDot; - worst_p = p; - } - } - return worst_p; - }; - - struct Node { float cost; size_t tri; int label; }; - struct NodeCmp { bool operator()(const Node& a, const Node& b) const { return a.cost > b.cost; } }; - - std::vector label(nTri, -2); - std::vector touched; touched.reserve(2048); - - while (pcount < opt.patch_min) { - int p = pick_patch_to_split(); - if (p < 0) { - cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; - break; - } - const auto& tris = pTris[p]; - if (tris.size() < 2) { - cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; - break; - } - - // choose 2 seeds with farthest normals (2-sweep) - size_t t0 = tris[0]; - size_t sA = t0; - float best = 1.0f; - for (size_t t : tris) { - float d = clamp11(dot3(face_normals[t0], face_normals[t])); - if (d < best) { best = d; sA = t; } - } - size_t sB = sA; - best = 1.0f; - for (size_t t : tris) { - float d = clamp11(dot3(face_normals[sA], face_normals[t])); - if (d < best) { best = d; sB = t; } - } - if (sA == sB) { - cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; - break; - } - - touched.clear(); - for (size_t t : tris) { - label[t] = -1; - touched.push_back(t); - } - - std::priority_queue, NodeCmp> pq; - label[sA] = 0; label[sB] = 1; - pq.push(Node{0.0f, sA, 0}); - pq.push(Node{0.0f, sB, 1}); - - const float3 seedN[2] = {face_normals[sA], face_normals[sB]}; - - while (!pq.empty()) { - Node cur = pq.top(); pq.pop(); - size_t t = cur.tri; - int lbl = cur.label; - if (label[t] != lbl) - continue; - - for (const auto& e : adjacency[t]) { - size_t nb = e.nbr; - if (label[nb] != -1) - continue; - - float d = clamp11(dot3(face_normals[t], face_normals[nb])); - if (d < cos_hard) - continue; - - float dn = clamp11(dot3(face_normals[nb], seedN[lbl])); - float cost = 1.0f - dn; - - label[nb] = (int8_t)lbl; - pq.push(Node{cost, nb, lbl}); - } - } - - size_t c0 = 0, c1 = 0; - for (size_t t : tris) { - if (label[t] == 0) c0++; - else if (label[t] == 1) c1++; - } - if (c0 == 0 || c1 == 0) { - for (size_t t : touched) label[t] = -2; - cstat = PatchConstraintStatus::TOO_FEW_UNSPLITTABLE; - break; - } - - patchID_t newP = (patchID_t)pcount; - pcount++; - - for (size_t t : tris) { - ids[t] = (label[t] == 1) ? newP : (patchID_t)p; - } - - for (size_t t : touched) label[t] = -2; - - // compress & rebuild - compress_ids(ids, pcount); - rebuild_patch_lists(pTris); - } - }; - - // Quality report computation - auto compute_report = [&](const std::vector& ids, - unsigned int pcount, - PatchConstraintStatus cstat, - PatchQualityReport& rep) { - rep.per_patch.assign(pcount, PatchQualityPatch{}); - rep.overall = PatchQualityLevel::SAFE; - rep.constraint_status = cstat; - rep.achieved_patches = pcount; - rep.requested_min = opt.patch_min; - rep.requested_max = opt.patch_max; - - std::vector> pTris(pcount); - for (size_t i = 0; i < nTri; ++i) { - int p = (int)ids[i]; - pTris[p].push_back(i); - } - - std::vector pSumN(pcount, make_float3(0, 0, 0)); - std::vector pSumA(pcount, 0.0f); - - for (int p = 0; p < (int)pcount; ++p) { - for (size_t t : pTris[p]) { - if (face_areas[t] > 0.0f) { - pSumN[p] = add3(pSumN[p], mul3(face_normals[t], face_areas[t])); - pSumA[p] += face_areas[t]; - } - } - } - - // reference angle for classification - float ref_angle_deg = patch_gate_enabled ? patch_normal_max_deg : hard_angle_deg; - - for (int p = 0; p < (int)pcount; ++p) { - PatchQualityPatch pq; - pq.n_tris = (unsigned int)pTris[p].size(); - - float3 meanN = normalize3(pSumN[p]); - float sumA = pSumA[p]; - float r = (sumA > DEME_TINY_FLOAT) ? (norm3(pSumN[p]) / sumA) : 0.0f; - pq.coherence_r = std::min(1.0f, std::max(0.0f, r)); - - float minDot = 1.0f; - for (size_t t : pTris[p]) { - float d = clamp11(dot3(meanN, face_normals[t])); - minDot = std::min(minDot, d); - } - pq.worst_angle_deg = rad2deg(std::acos(clamp11(minDot))); - - unsigned int hard_cross = 0; - unsigned int conc_cross = 0; - unsigned int unoriented = 0; - - for (size_t t : pTris[p]) { - for (const auto& e : adjacency[t]) { - size_t nb = e.nbr; - if ((int)ids[nb] != p) - continue; - - float d = clamp11(dot3(face_normals[t], face_normals[nb])); - if (d < cos_hard) - hard_cross++; - - if (opt.block_concave_edges) { - if (!e.oriented_ok) { - unoriented++; - } else { - const float3& vA = m_vertices[e.va]; - const float3& vB = m_vertices[e.vb]; - float dih = signedDihedralDeg(face_normals[t], face_normals[nb], vA, vB); - if (dih < -opt.concave_allow_deg) - conc_cross++; - } - } - } - } - - pq.hard_crossings = hard_cross / 2; - pq.concave_crossings = conc_cross / 2; - pq.unoriented_edges = unoriented / 2; - - PatchQualityLevel lvl = PatchQualityLevel::SAFE; - - if (qopt.hard_crossings_are_critical && pq.hard_crossings > 0) { - lvl = PatchQualityLevel::CRITICAL; - } - - if (lvl != PatchQualityLevel::CRITICAL) { - bool angle_ok = (pq.worst_angle_deg <= ref_angle_deg); - bool angle_warn = (pq.worst_angle_deg <= ref_angle_deg + qopt.warn_worst_angle_margin_deg); - - if (pq.coherence_r < qopt.warn_r || !angle_warn) { - lvl = PatchQualityLevel::CRITICAL; - } else if (pq.coherence_r < qopt.safe_r || !angle_ok) { - lvl = PatchQualityLevel::WARN; - } - } - - if (opt.block_concave_edges && pq.concave_crossings > 0) { - if (qopt.concave_crossings_are_critical) - lvl = PatchQualityLevel::CRITICAL; - else if (lvl == PatchQualityLevel::SAFE) - lvl = PatchQualityLevel::WARN; - } - - if (opt.block_concave_edges && pq.unoriented_edges >= qopt.unoriented_warn_threshold && lvl == PatchQualityLevel::SAFE) { - lvl = PatchQualityLevel::WARN; - } - - pq.level = lvl; - rep.per_patch[p] = pq; - - if ((int)lvl > (int)rep.overall) - rep.overall = lvl; - } - }; - - // ------------------------------------------------------------ - // Optional auto tuning (OFF unless opt.auto_tune.enabled == true) - // ------------------------------------------------------------ - auto run_full = [&](PatchSplitOptions run_opt, - std::vector& ids_out, - unsigned int& pcount_out, - PatchConstraintStatus& cstat_out, - PatchQualityReport* rep_out) { - cstat_out = PatchConstraintStatus::SATISFIED; - - float run_soft = (run_opt.soft_angle_deg >= 0.0f) ? run_opt.soft_angle_deg : hard_angle_deg; - run_soft = std::min(hard_angle_deg, std::max(0.0f, run_soft)); - - bool run_patch_gate = (run_opt.patch_normal_max_deg >= 0.0f); - if (!run_patch_gate && run_soft < hard_angle_deg) { - run_opt.patch_normal_max_deg = run_soft; - run_patch_gate = true; - } - - float run_cos_patch = -1.0f; - if (run_patch_gate) { - float run_patch_deg = std::min(180.0f, std::max(0.0f, run_opt.patch_normal_max_deg)); - run_cos_patch = std::cos(deg2rad(run_patch_deg)); - } - - // segment - segment_once(run_opt, run_soft, run_patch_gate, run_cos_patch, ids_out, pcount_out); - compress_ids(ids_out, pcount_out); - - // enforce max, then min (count-only) - enforce_patch_max(ids_out, pcount_out, cstat_out); - enforce_patch_min(ids_out, pcount_out, cstat_out); - - // final compress - compress_ids(ids_out, pcount_out); - - if (rep_out) { - PatchQualityReport tmp; - // Update globals for report reference (patch_gate_enabled etc.) are based on outer opt; - // for report classification, we reuse "current" (outer) patch_gate_enabled and patch_normal_max_deg. - // For best accuracy you can compute ref_angle from run_opt as well; keep simple here. - compute_report(ids_out, pcount_out, cstat_out, tmp); - *rep_out = std::move(tmp); - } - }; - - std::vector best_ids; - unsigned int best_pcount = 0; - PatchConstraintStatus best_cstat = PatchConstraintStatus::SATISFIED; - PatchQualityReport best_rep; - - if (!opt.auto_tune.enabled) { - run_full(opt, best_ids, best_pcount, best_cstat, out_report ? &best_rep : nullptr); - } else { - // Auto-tuning is conservative: it will not run if you hard-fix the count (patch_min == patch_max), - // because then your intention is explicit ("keep the cube a cube"). - if (opt.patch_min == opt.patch_max) { - run_full(opt, best_ids, best_pcount, best_cstat, out_report ? &best_rep : nullptr); - } else { - // Start from user options; search by tightening/loosening patch_normal_max_deg (and soft if present) - PatchSplitOptions cur = opt; - - auto severity_score = [&](PatchQualityLevel lvl) { return (int)lvl; }; - - bool have_best = false; - - for (unsigned int it = 0; it < opt.auto_tune.max_iters; ++it) { - std::vector ids; - unsigned int pc = 0; - PatchConstraintStatus cs = PatchConstraintStatus::SATISFIED; - PatchQualityReport rep; - - run_full(cur, ids, pc, cs, &rep); - - // candidate score: prioritize meeting constraints, then quality, then fewer patches - bool constraints_ok = (cs == PatchConstraintStatus::SATISFIED); - int sev = severity_score(rep.overall); - - auto better_than = [&](bool ok, int s, unsigned int p) { - if (!have_best) return true; - bool best_ok = (best_cstat == PatchConstraintStatus::SATISFIED); - int best_sev = severity_score(best_rep.overall); - if (ok != best_ok) return ok; // prefer satisfied - if (s != best_sev) return s < best_sev; // prefer safer - return p < best_pcount; // prefer fewer patches - }; - - if (better_than(constraints_ok, sev, pc)) { - best_ids = std::move(ids); - best_pcount = pc; - best_cstat = cs; - best_rep = std::move(rep); - have_best = true; - } - - // stop if good enough - if (constraints_ok && (int)best_rep.overall <= (int)opt.auto_tune.target_level) - break; - - // Adjust rules: - // - If CRITICAL and we can afford more patches => tighten (smaller patch_normal_max, smaller soft) - // - If too many unmergeable patches => loosen (bigger patch_normal_max, bigger soft, disable concavity if needed) - // - If too few patches => tighten - if (cs == PatchConstraintStatus::TOO_MANY_UNMERGEABLE) { - // loosen - if (cur.patch_normal_max_deg >= 0.0f) - cur.patch_normal_max_deg = std::min(180.0f, cur.patch_normal_max_deg + opt.auto_tune.step_deg); - if (cur.soft_angle_deg >= 0.0f) - cur.soft_angle_deg = std::min(hard_angle_deg, cur.soft_angle_deg + opt.auto_tune.step_deg); - if (cur.block_concave_edges && opt.auto_tune.allow_enable_concavity) { - // concavity block can prevent merging; relax it - cur.block_concave_edges = false; - } - } else if (pc < opt.patch_min || rep.overall == PatchQualityLevel::CRITICAL) { - // tighten if possible - if (cur.patch_normal_max_deg < 0.0f) - cur.patch_normal_max_deg = std::min(hard_angle_deg, 45.0f); // enable with a sane default - else - cur.patch_normal_max_deg = std::max(0.0f, cur.patch_normal_max_deg - opt.auto_tune.step_deg); - - if (cur.soft_angle_deg >= 0.0f) - cur.soft_angle_deg = std::max(0.0f, cur.soft_angle_deg - opt.auto_tune.step_deg); - - if (!cur.block_concave_edges && opt.auto_tune.allow_enable_concavity) { - cur.block_concave_edges = true; - cur.concave_allow_deg = std::max(0.0f, cur.concave_allow_deg); - } - } else if (pc > opt.patch_max) { - // loosen (but note: enforce_patch_max already tries) - if (cur.patch_normal_max_deg >= 0.0f) - cur.patch_normal_max_deg = std::min(180.0f, cur.patch_normal_max_deg + opt.auto_tune.step_deg); - if (cur.soft_angle_deg >= 0.0f) - cur.soft_angle_deg = std::min(hard_angle_deg, cur.soft_angle_deg + opt.auto_tune.step_deg); - } else { - // stable but not good enough; slightly tighten coherence if we have headroom under patch_max - if (pc < opt.patch_max) { - if (cur.patch_normal_max_deg < 0.0f) - cur.patch_normal_max_deg = std::min(hard_angle_deg, 45.0f); - else - cur.patch_normal_max_deg = std::max(0.0f, cur.patch_normal_max_deg - opt.auto_tune.step_deg); - } else { - break; - } - } - } - - // If never found, fall back - if (!have_best) { - run_full(opt, best_ids, best_pcount, best_cstat, out_report ? &best_rep : nullptr); - } - } - } - - // Commit to mesh state - m_patch_ids = std::move(best_ids); - nPatches = best_pcount; - patches_explicitly_set = true; - - // Feedback output - if (out_report) { - *out_report = std::move(best_rep); - } - - // Material broadcasting (same as existing behavior) - if (isMaterialSet && materials.size() == 1) { - materials = std::vector>(nPatches, materials[0]); - } - if (isMaterialSet && materials.size() != nPatches) { - DEME_ERROR( - "The number of materials set (%zu) does not match the number of patches (%u). Please set the " - "material for each patch or use a single material for all patches.", - materials.size(), nPatches); - } - - return nPatches; -} - -// Manually set patch IDs for each triangle -void DEMMesh::SetPatchIDs(const std::vector& patch_ids) { - assertTriLength(patch_ids.size(), "SetPatchIDs"); - - // Use rank-transformed patch IDs to ensure they are contiguous and start from 0 - auto [compressed_ids, changed] = rank_transform(patch_ids); - - if (changed) { - DEME_WARNING( - std::string("Patch IDs you supplied for a mesh were not contiguous or did not start from 0.\nThey have " - "been transformed to be contiguous and start from 0.")); - } - - // Copy the patch IDs - m_patch_ids = compressed_ids; - - // Calculate the number of patches (maximum patch ID + 1) - if (!compressed_ids.empty()) { - int max_patch_id = *std::max_element(compressed_ids.begin(), compressed_ids.end()); - nPatches = max_patch_id + 1; - } else { - nPatches = 1; - } - - patches_explicitly_set = true; - - // If material is set and we can broadcast it to all patches, we do so - if (isMaterialSet && materials.size() == 1) { - materials = std::vector>(nPatches, materials[0]); - } - // If material is set and we cannot broadcast it to all patches, we raise error - if (isMaterialSet && materials.size() != nPatches) { - DEME_ERROR( - "The number of materials set (%zu) does not match the number of patches (%u). Please set the " - "material for each patch or use a single material for all patches.", - materials.size(), nPatches); - } -} // Compute patch locations (relative to CoM, which is implicitly at 0,0,0) // If not explicitly set, calculates as: @@ -1736,26 +938,6 @@ void DEMMesh::ComputeMassProperties(double& volume, float3& center, float3& iner // Section for Watertight test, false if not -struct QuantKey3 { - int64_t x, y, z; - bool operator==(const QuantKey3& o) const noexcept { return x==o.x && y==o.y && z==o.z; } -}; -struct QuantKey3Hash { - size_t operator()(const QuantKey3& k) const noexcept { - size_t h1 = std::hash{}(k.x); - size_t h2 = std::hash{}(k.y); - size_t h3 = std::hash{}(k.z); - size_t h = h1; - h ^= h2 + 0x9e3779b97f4a7c15ULL + (h<<6) + (h>>2); - h ^= h3 + 0x9e3779b97f4a7c15ULL + (h<<6) + (h>>2); - return h; - } -}; - -static inline int64_t q(double v, double eps) { - return (int64_t)std::llround(v / eps); -} - bool DEMMesh::IsWatertight(size_t* boundary_edges, size_t* nonmanifold_edges) const { if (boundary_edges) *boundary_edges = 0; if (nonmanifold_edges) *nonmanifold_edges = 0; @@ -1803,37 +985,8 @@ bool DEMMesh::IsWatertight(size_t* boundary_edges, size_t* nonmanifold_edges) co return false; } - double minx = m_vertices[0].x, miny = m_vertices[0].y, minz = m_vertices[0].z; - double maxx = minx, maxy = miny, maxz = minz; - for (const auto& v : m_vertices) { - minx = std::min(minx, (double)v.x); miny = std::min(miny, (double)v.y); - minz = std::min(minz, (double)v.z); - maxx = std::max(maxx, (double)v.x); maxy = std::max(maxy, (double)v.y); - maxz = std::max(maxz, (double)v.z); - } - const double dx = maxx - minx, dy = maxy - miny, dz = maxz - minz; - const double diag = std::sqrt(dx*dx + dy*dy + dz*dz); - const double eps = std::max(diag * 1e-9, 1e-12); - - std::unordered_map rep; - rep.reserve(m_vertices.size()); - - std::vector canon(m_vertices.size(), (size_t)-1); - size_t next_id = 0; - - for (size_t i = 0; i < m_vertices.size(); ++i) { - const auto& v = m_vertices[i]; - QuantKey3 key{ q(v.x, eps), q(v.y, eps), q(v.z, eps) }; - - auto it = rep.find(key); - if (it == rep.end()) { - rep.emplace(key, next_id); - canon[i] = next_id; - next_id++; - } else { - canon[i] = it->second; - } - } + const double eps = computeVertexQuantEps(m_vertices); + const auto canon = buildCanonicalVertexMap(m_vertices, eps); std::map, size_t> edge_counts2; for (const auto& face : m_face_v_indices) { diff --git a/src/DEM/dT.cpp b/src/DEM/dT.cpp index 9ad0a201..82c053e7 100644 --- a/src/DEM/dT.cpp +++ b/src/DEM/dT.cpp @@ -120,6 +120,7 @@ void DEMDynamicThread::packDataPointers() { ownerMeshNeverWinner.bindDevicePointer(&(granData->ownerMeshNeverWinner)); ownerPatchMesh.bindDevicePointer(&(granData->ownerPatchMesh)); triPatchID.bindDevicePointer(&(granData->triPatchID)); + triNeighborIndex.bindDevicePointer(&(granData->triNeighborIndex)); triNeighbor1.bindDevicePointer(&(granData->triNeighbor1)); triNeighbor2.bindDevicePointer(&(granData->triNeighbor2)); triNeighbor3.bindDevicePointer(&(granData->triNeighbor3)); @@ -284,6 +285,7 @@ void DEMDynamicThread::migrateDataToDevice() { ownerMeshNeverWinner.toDeviceAsync(streamInfo.stream); ownerPatchMesh.toDeviceAsync(streamInfo.stream); triPatchID.toDeviceAsync(streamInfo.stream); + triNeighborIndex.toDeviceAsync(streamInfo.stream); triNeighbor1.toDeviceAsync(streamInfo.stream); triNeighbor2.toDeviceAsync(streamInfo.stream); triNeighbor3.toDeviceAsync(streamInfo.stream); @@ -562,6 +564,7 @@ void DEMDynamicThread::allocateGPUArrays(size_t nOwnerBodies, size_t nTriMeshes, size_t nSpheresGM, size_t nTriGM, + size_t nTriNeighbors, size_t nMeshPatches, unsigned int nAnalGM, size_t nExtraContacts, @@ -645,9 +648,10 @@ void DEMDynamicThread::allocateGPUArrays(size_t nOwnerBodies, DEME_DUAL_ARRAY_RESIZE(relPosNode2, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(relPosNode3, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(triPatchID, nTriGM, 0); - DEME_DUAL_ARRAY_RESIZE(triNeighbor1, nTriGM, NULL_BODYID); - DEME_DUAL_ARRAY_RESIZE(triNeighbor2, nTriGM, NULL_BODYID); - DEME_DUAL_ARRAY_RESIZE(triNeighbor3, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighborIndex, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor1, nTriNeighbors, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor2, nTriNeighbors, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor3, nTriNeighbors, NULL_BODYID); // Resize to the number of mesh patches DEME_DUAL_ARRAY_RESIZE(ownerPatchMesh, nMeshPatches, 0); @@ -835,7 +839,8 @@ void DEMDynamicThread::populateEntityArrays(const std::vector triPatchID = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); - // Triangle edge neighbors (global triangle indices; NULL_BODYID for boundary) + // Triangle edge neighbors (compact; index via triNeighborIndex) + DualArray triNeighborIndex = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray triNeighbor1 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray triNeighbor2 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray triNeighbor3 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); @@ -735,6 +736,7 @@ class DEMDynamicThread { size_t nTriMeshes, size_t nSpheresGM, size_t nTriGM, + size_t nTriNeighbors, size_t nMeshPatches, unsigned int nAnalGM, size_t nExtraContacts, @@ -775,13 +777,14 @@ class DEMDynamicThread { const std::vector& ext_obj_mass_types, const std::vector& ext_obj_moi_types, const std::vector& ext_obj_comp_num, - const std::vector& mesh_obj_mass_types, - const std::vector& mesh_obj_moi_types, - const std::vector& mesh_obj_mass_offsets, - size_t nExistOwners, - size_t nExistSpheres, - size_t nExistingFacets, - size_t nExistingPatches); + const std::vector& mesh_obj_mass_types, + const std::vector& mesh_obj_moi_types, + const std::vector& mesh_obj_mass_offsets, + size_t nExistOwners, + size_t nExistSpheres, + size_t nExistingFacets, + size_t nExistingPatches, + size_t nExistingTriNeighbors); void registerPolicies(const std::unordered_map& template_number_name_map, const ClumpTemplateFlatten& clump_templates, const std::vector& ext_obj_mass_types, @@ -864,6 +867,7 @@ class DEMDynamicThread { size_t nExistingSpheres, size_t nExistingTriMesh, size_t nExistingFacets, + size_t nExistingTriNeighbors, size_t nExistingPatches, unsigned int nExistingObj, unsigned int nExistingAnalGM); diff --git a/src/DEM/kT.cpp b/src/DEM/kT.cpp index e7c5eb03..093eb326 100644 --- a/src/DEM/kT.cpp +++ b/src/DEM/kT.cpp @@ -731,6 +731,7 @@ void DEMKinematicThread::packDataPointers() { ownerMeshConvex.bindDevicePointer(&(granData->ownerMeshConvex)); ownerMeshNeverWinner.bindDevicePointer(&(granData->ownerMeshNeverWinner)); triPatchID.bindDevicePointer(&(granData->triPatchID)); + triNeighborIndex.bindDevicePointer(&(granData->triNeighborIndex)); triNeighbor1.bindDevicePointer(&(granData->triNeighbor1)); triNeighbor2.bindDevicePointer(&(granData->triNeighbor2)); triNeighbor3.bindDevicePointer(&(granData->triNeighbor3)); @@ -785,6 +786,7 @@ void DEMKinematicThread::migrateDataToDevice() { ownerMeshConvex.toDeviceAsync(streamInfo.stream); ownerMeshNeverWinner.toDeviceAsync(streamInfo.stream); triPatchID.toDeviceAsync(streamInfo.stream); + triNeighborIndex.toDeviceAsync(streamInfo.stream); triNeighbor1.toDeviceAsync(streamInfo.stream); triNeighbor2.toDeviceAsync(streamInfo.stream); triNeighbor3.toDeviceAsync(streamInfo.stream); @@ -890,6 +892,7 @@ void DEMKinematicThread::allocateGPUArrays(size_t nOwnerBodies, size_t nTriMeshes, size_t nSpheresGM, size_t nTriGM, + size_t nTriNeighbors, unsigned int nAnalGM, size_t nExtraContacts, unsigned int nMassProperties, @@ -975,9 +978,10 @@ void DEMKinematicThread::allocateGPUArrays(size_t nOwnerBodies, // Resize to the number of triangle facets DEME_DUAL_ARRAY_RESIZE(ownerTriMesh, nTriGM, 0); DEME_DUAL_ARRAY_RESIZE(triPatchID, nTriGM, 0); - DEME_DUAL_ARRAY_RESIZE(triNeighbor1, nTriGM, NULL_BODYID); - DEME_DUAL_ARRAY_RESIZE(triNeighbor2, nTriGM, NULL_BODYID); - DEME_DUAL_ARRAY_RESIZE(triNeighbor3, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighborIndex, nTriGM, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor1, nTriNeighbors, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor2, nTriNeighbors, NULL_BODYID); + DEME_DUAL_ARRAY_RESIZE(triNeighbor3, nTriNeighbors, NULL_BODYID); DEME_DUAL_ARRAY_RESIZE(relPosNode1, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(relPosNode2, nTriGM, make_float3(0)); DEME_DUAL_ARRAY_RESIZE(relPosNode3, nTriGM, make_float3(0)); @@ -1055,7 +1059,8 @@ void DEMKinematicThread::populateEntityArrays(const std::vector prescans_comp; @@ -1157,22 +1162,32 @@ void DEMKinematicThread::populateEntityArrays(const std::vector>& input_clump_batches, @@ -1229,6 +1244,7 @@ void DEMKinematicThread::updateClumpMeshArrays(const std::vector& dT_data, size_t nContacts) { diff --git a/src/DEM/kT.h b/src/DEM/kT.h index 5620b030..e7fb6492 100644 --- a/src/DEM/kT.h +++ b/src/DEM/kT.h @@ -204,7 +204,8 @@ class DEMKinematicThread { // Mesh patch information: each facet belongs to a patch // Patch ID for each triangle facet (maps facet to patch) DualArray triPatchID = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); - // Triangle edge neighbors (global triangle indices; NULL_BODYID for boundary) + // Triangle edge neighbors (compact; index via triNeighborIndex) + DualArray triNeighborIndex = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray triNeighbor1 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray triNeighbor2 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); DualArray triNeighbor3 = DualArray(&m_approxHostBytesUsed, &m_approxDeviceBytesUsed); @@ -340,6 +341,7 @@ class DEMKinematicThread { size_t nTriMeshes, size_t nSpheresGM, size_t nTriGM, + size_t nTriNeighbors, unsigned int nAnalGM, size_t nExtraContacts, unsigned int nMassProperties, @@ -366,7 +368,8 @@ class DEMKinematicThread { size_t nExistOwners, size_t nExistSpheres, size_t nExistingFacets, - size_t nExistingMeshPatches); + size_t nExistingMeshPatches, + size_t nExistingTriNeighbors); /// Initialize arrays void initGPUArrays(const std::vector>& input_clump_batches, @@ -405,6 +408,7 @@ class DEMKinematicThread { size_t nExistingSpheres, size_t nExistingTriMesh, size_t nExistingFacets, + size_t nExistingTriNeighbors, size_t nExistingPatches, unsigned int nExistingObj, unsigned int nExistingAnalGM); diff --git a/src/DEM/utils/HostSideHelpers.hpp b/src/DEM/utils/HostSideHelpers.hpp index 34f0cd4a..789f9346 100644 --- a/src/DEM/utils/HostSideHelpers.hpp +++ b/src/DEM/utils/HostSideHelpers.hpp @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include "../kernel/DEMHelperKernels.cuh" #include "../VariableTypes.h" @@ -30,6 +32,77 @@ namespace deme { +namespace detail { + +struct QuantKey3 { + int64_t x, y, z; + bool operator==(const QuantKey3& o) const noexcept { return x == o.x && y == o.y && z == o.z; } +}; + +struct QuantKey3Hash { + size_t operator()(const QuantKey3& k) const noexcept { + size_t h1 = std::hash{}(k.x); + size_t h2 = std::hash{}(k.y); + size_t h3 = std::hash{}(k.z); + size_t h = h1; + h ^= h2 + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + h ^= h3 + 0x9e3779b97f4a7c15ULL + (h << 6) + (h >> 2); + return h; + } +}; + +inline int64_t quantize_coord(double v, double eps) { + return static_cast(std::llround(v / eps)); +} + +} // namespace detail + +inline double computeVertexQuantEps(const std::vector& vertices) { + if (vertices.empty()) { + return 0.0; + } + double minx = vertices[0].x, miny = vertices[0].y, minz = vertices[0].z; + double maxx = minx, maxy = miny, maxz = minz; + for (const auto& v : vertices) { + minx = std::min(minx, (double)v.x); + miny = std::min(miny, (double)v.y); + minz = std::min(minz, (double)v.z); + maxx = std::max(maxx, (double)v.x); + maxy = std::max(maxy, (double)v.y); + maxz = std::max(maxz, (double)v.z); + } + const double dx = maxx - minx, dy = maxy - miny, dz = maxz - minz; + const double diag = std::sqrt(dx * dx + dy * dy + dz * dz); + return std::max(diag * 1e-9, 1e-12); +} + +inline std::vector buildCanonicalVertexMap(const std::vector& vertices, double eps) { + if (vertices.empty()) { + return {}; + } + if (eps <= 0.0) { + eps = 1e-12; + } + std::unordered_map rep; + rep.reserve(vertices.size()); + std::vector canon(vertices.size(), static_cast(-1)); + size_t next_id = 0; + for (size_t i = 0; i < vertices.size(); ++i) { + const auto& v = vertices[i]; + detail::QuantKey3 key{detail::quantize_coord(v.x, eps), detail::quantize_coord(v.y, eps), + detail::quantize_coord(v.z, eps)}; + auto it = rep.find(key); + if (it == rep.end()) { + rep.emplace(key, next_id); + canon[i] = next_id; + next_id++; + } else { + canon[i] = it->second; + } + } + return canon; +} + // Generic helper to access tuple of pointers template auto dereference_at(const Tuple& ptrs, size_t idx, std::index_sequence) { diff --git a/src/algorithms/DEMContactDetection.cu b/src/algorithms/DEMContactDetection.cu index eee8d0a9..447c30e4 100644 --- a/src/algorithms/DEMContactDetection.cu +++ b/src/algorithms/DEMContactDetection.cu @@ -1622,6 +1622,7 @@ void contactDetection(std::shared_ptr& bin_sphere_kern propagateActiveTriLabels<<>>(activeTriKeysUnique, labelsIn, labelsOut, groupActiveStart, groupActiveCount, + granData->triNeighborIndex, granData->triNeighbor1, granData->triNeighbor2, granData->triNeighbor3, numUniqueActiveTri); bodyID_t* tmp = labelsIn; diff --git a/src/algorithms/DEMContactDetectionKernels.cuh b/src/algorithms/DEMContactDetectionKernels.cuh index aa52a54e..ed6a6df9 100644 --- a/src/algorithms/DEMContactDetectionKernels.cuh +++ b/src/algorithms/DEMContactDetectionKernels.cuh @@ -322,7 +322,9 @@ __global__ void computeGroupWinners(const contact_t* groupTypes, forceSingleIsland[myID] = single_island ? 1 : 0; notStupidBool_t pickA = 0; - if (A_never && !B_never) { + if (A_never && B_never) { + pickA = 0; // deterministic: prefer B when both are never-winner + } else if (A_never && !B_never) { pickA = 0; } else if (B_never && !A_never) { pickA = 1; @@ -421,6 +423,7 @@ __global__ void propagateActiveTriLabels(const uint64_t* keys, bodyID_t* labelsOut, const contactPairs_t* groupStart, const contactPairs_t* groupCount, + const bodyID_t* triNeighborIndex, const bodyID_t* triNeighbor1, const bodyID_t* triNeighbor2, const bodyID_t* triNeighbor3, @@ -434,7 +437,12 @@ __global__ void propagateActiveTriLabels(const uint64_t* keys, const contactPairs_t count = groupCount[grp]; bodyID_t label = labelsIn[myID]; - bodyID_t nbs[3] = {triNeighbor1[triID], triNeighbor2[triID], triNeighbor3[triID]}; + const bodyID_t nb_idx = triNeighborIndex[triID]; + if (nb_idx == NULL_BODYID) { + labelsOut[myID] = label; + return; + } + bodyID_t nbs[3] = {triNeighbor1[nb_idx], triNeighbor2[nb_idx], triNeighbor3[nb_idx]}; for (int e = 0; e < 3; ++e) { const bodyID_t nb = nbs[e]; if (nb == NULL_BODYID || count == 0) { diff --git a/src/demo/DEMdemo_ResponseAngleMesh.cpp b/src/demo/DEMdemo_ResponseAngleMesh.cpp index 85370fd9..f0489ed1 100644 --- a/src/demo/DEMdemo_ResponseAngleMesh.cpp +++ b/src/demo/DEMdemo_ResponseAngleMesh.cpp @@ -78,7 +78,7 @@ int main() { DEMSolver DEMSim; DEMSim.SetOutputFormat(OUTPUT_FORMAT::CSV); DEMSim.SetOutputContent(OUTPUT_CONTENT::FAMILY); - DEMSim.SetMeshOutputFormat("STL"); + DEMSim.SetMeshOutputFormat("VTK"); DEMSim.SetNoForceRecord(); DEMSim.SetMeshUniversalContact(true); const float mm_to_m = 0.001f; @@ -109,6 +109,8 @@ int main() { float3 tri_center = make_float3(0, 0, 0); float3 tri_inertia = make_float3(0, 0, 0); tri_template->ComputeMassProperties(tri_volume, tri_center, tri_inertia); + // tri_template->SetConvex(true); // for convex particels only + // tri_template->SetNeverWinner(true); // if mesh is more coarse the other contacts const float particle_mass = static_cast(tri_volume * particle_density); const float3 particle_moi = tri_inertia * particle_density; std::cout << "Particle volume (m^3): " << tri_volume << ", mass (kg): "<< particle_mass << std::endl; @@ -218,7 +220,7 @@ int main() { std::cout << "Frame: " << currframe << std::endl; DEMSim.ShowThreadCollaborationStats(); char filename[100]; - sprintf(filename, "DEMdemo_output_%04d.stl", currframe); + sprintf(filename, "DEMdemo_output_%04d.vtk", currframe); DEMSim.WriteMeshFile(out_dir / filename); currframe++; max_v = max_v_finder->GetValue(); diff --git a/src/demo/ModularTests/CMakeLists.txt b/src/demo/ModularTests/CMakeLists.txt index 8d10e1fd..05562886 100644 --- a/src/demo/ModularTests/CMakeLists.txt +++ b/src/demo/ModularTests/CMakeLists.txt @@ -13,7 +13,6 @@ SET(LIBRARIES SET(MODULAR_TESTS DEMTest_MeshTemplate DEMTest_PatchLocations - DEMTest_MeshPatch DEMTest_SimpleCollisions ) diff --git a/src/demo/ModularTests/DEMTest_MeshPatch.cpp b/src/demo/ModularTests/DEMTest_MeshPatch.cpp deleted file mode 100644 index 06c9b5e2..00000000 --- a/src/demo/ModularTests/DEMTest_MeshPatch.cpp +++ /dev/null @@ -1,271 +0,0 @@ -// Copyright (c) 2021, SBEL GPU Development Team -// Copyright (c) 2021, University of Wisconsin - Madison -// -// SPDX-License-Identifier: BSD-3-Clause - -// ============================================================================= -// A demo that tests mesh patch splitting functionality. -// This demo loads a mesh and splits it into convex patches based on angle -// thresholds, demonstrating the mesh patch splitting utility. -// ============================================================================= - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace deme; -using namespace std::filesystem; - -int main() { - std::cout << "========================================" << std::endl; - std::cout << "DEM Mesh Patch Splitting Demo" << std::endl; - std::cout << "========================================" << std::endl; - - // Test with a simple cube mesh - std::cout << "\n--- Test 1: Cube Mesh with Default Patch Info ---" << std::endl; - auto cube_mesh = std::make_shared(); - bool loaded = cube_mesh->LoadWavefrontMesh((GET_DATA_PATH() / "mesh/cube.obj").string()); - - if (loaded) { - std::cout << "Loaded cube mesh successfully" << std::endl; - std::cout << "Number of triangles: " << cube_mesh->GetNumTriangles() << std::endl; - std::cout << "Number of vertices: " << cube_mesh->GetNumNodes() << std::endl; - - // Test default patch info (should be all in patch 0) - std::cout << "\nDefault patch info (assuming convex mesh):" << std::endl; - std::cout << "Number of patches: " << cube_mesh->GetNumPatches() << std::endl; - std::cout << "Patches explicitly set: " << (cube_mesh->ArePatchesExplicitlySet() ? "yes" : "no") << std::endl; - const auto& default_patch_ids = cube_mesh->GetPatchIDs(); - std::cout << "All triangles in patch 0: " - << (std::all_of(default_patch_ids.begin(), default_patch_ids.end(), [](int id) { return id == 0; }) - ? "yes" - : "no") - << std::endl; - - // Test different angle thresholds - std::cout << "\n--- Test 2: Automatic Patch Splitting ---" << std::endl; - float thresholds[] = {10.0f, 45.0f, 90.0f, 300.0f}; - - for (float threshold : thresholds) { - size_t num_patches = cube_mesh->SplitIntoConvexPatches(threshold); - std::cout << "\nAngle threshold: " << std::fixed << std::setprecision(1) << threshold << " degrees" - << std::endl; - std::cout << "Number of patches: " << num_patches << std::endl; - std::cout << "Patches explicitly set: " << (cube_mesh->ArePatchesExplicitlySet() ? "yes" : "no") - << std::endl; - - // Show patch distribution - const auto& patch_ids = cube_mesh->GetPatchIDs(); - - // Count triangles per patch - std::map patch_counts; - for (int patch_id : patch_ids) { - patch_counts[patch_id]++; - } - - std::cout << "Patch distribution:" << std::endl; - for (const auto& entry : patch_counts) { - std::cout << " Patch " << entry.first << ": " << entry.second << " triangles" << std::endl; - } - } - - // Optimized patch settings for convex-focused splitting (prefer single patch) - std::cout << "\n--- Test 2b: Optimized Convex Patch Splitting (Cube) ---" << std::endl; - DEMMesh::PatchSplitOptions opt; - opt.soft_angle_deg = -1.0f; - opt.patch_normal_max_deg = -1.0f; - opt.block_concave_edges = true; - opt.concave_allow_deg = 0.0f; - opt.patch_min = 1; - opt.patch_max = std::numeric_limits::max(); - opt.seed_largest_first = true; - opt.auto_tune.enabled = false; - - DEMMesh::PatchQualityReport rep_cube; - size_t num_patches_opt = cube_mesh->SplitIntoConvexPatches(120.0f, opt, &rep_cube); - std::cout << "Optimized patches: " << num_patches_opt << " (quality " - << static_cast(rep_cube.overall) << ")" << std::endl; - - // Test manual patch ID setting - std::cout << "\n--- Test 3: Manual Patch ID Setting ---" << std::endl; - size_t num_tris = cube_mesh->GetNumTriangles(); - std::vector manual_patches(num_tris); - // Split triangles into 3 patches based on index - for (size_t i = 0; i < num_tris; ++i) { - manual_patches[i] = i % 3; // Assign patches 0, 1, 2 cyclically - } - - cube_mesh->SetPatchIDs(manual_patches); - std::cout << "Manually set patch IDs (cycling 0, 1, 2)" << std::endl; - std::cout << "Number of patches: " << cube_mesh->GetNumPatches() << std::endl; - std::cout << "Patches explicitly set: " << (cube_mesh->ArePatchesExplicitlySet() ? "yes" : "no") << std::endl; - - // Count triangles per patch - const auto& manual_patch_ids = cube_mesh->GetPatchIDs(); - std::map manual_patch_counts; - for (int patch_id : manual_patch_ids) { - manual_patch_counts[patch_id]++; - } - std::cout << "Manual patch distribution:" << std::endl; - for (const auto& entry : manual_patch_counts) { - std::cout << " Patch " << entry.first << ": " << entry.second << " triangles" << std::endl; - } - } else { - std::cout << "Failed to load cube mesh" << std::endl; - } - - // Test with sphere mesh if available - std::cout << "\n--- Test 4: Sphere Mesh ---" << std::endl; - auto sphere_mesh = std::make_shared(); - loaded = sphere_mesh->LoadWavefrontMesh((GET_DATA_PATH() / "mesh/sphere.obj").string()); - - if (loaded) { - std::cout << "Loaded sphere mesh successfully" << std::endl; - std::cout << "Number of triangles: " << sphere_mesh->GetNumTriangles() << std::endl; - std::cout << "Number of vertices: " << sphere_mesh->GetNumNodes() << std::endl; - - // Optimized patch split (prefer single patch) - DEMMesh::PatchSplitOptions opt; - opt.soft_angle_deg = -1.0f; - opt.patch_normal_max_deg = -1.0f; - opt.block_concave_edges = true; - opt.concave_allow_deg = 0.0f; - opt.patch_min = 1; - opt.patch_max = std::numeric_limits::max(); - opt.seed_largest_first = true; - opt.auto_tune.enabled = false; - - DEMMesh::PatchQualityReport rep_sphere; - size_t num_patches = sphere_mesh->SplitIntoConvexPatches(120.0f, opt, &rep_sphere); - std::cout << "Split into " << num_patches << " patches (optimized, quality " - << static_cast(rep_sphere.overall) << ")" << std::endl; - - if (sphere_mesh->ArePatchesExplicitlySet()) { - const auto& patch_ids = sphere_mesh->GetPatchIDs(); - - // Count triangles per patch - std::map patch_counts; - for (int patch_id : patch_ids) { - patch_counts[patch_id]++; - } - - std::cout << "Number of patches with different sizes:" << std::endl; - std::map size_distribution; - for (const auto& entry : patch_counts) { - size_distribution[entry.second]++; - } - for (const auto& entry : size_distribution) { - std::cout << " " << entry.second << " patches with " << entry.first << " triangles each" << std::endl; - } - } - } else { - std::cout << "Sphere mesh not available, skipping" << std::endl; - } - - // Test edge case: empty mesh - std::cout << "\n--- Test 5: Empty Mesh ---" << std::endl; - auto empty_mesh = std::make_shared(); - std::cout << "Empty mesh default patches: " << empty_mesh->GetNumPatches() << " (expected: 1)" << std::endl; - std::cout << "Patches explicitly set: " << (empty_mesh->ArePatchesExplicitlySet() ? "yes" : "no") - << " (expected: no)" << std::endl; - - // Test concave mesh (drum) - std::cout << "\n--- Test 6: Concave Drum Mesh (STL) ---" << std::endl; - auto drum_mesh = std::make_shared(); - loaded = drum_mesh->LoadSTLMesh((GET_DATA_PATH() / "mesh/drum.stl").string()); - if (loaded) { - std::cout << "Loaded drum mesh successfully" << std::endl; - std::cout << "Number of triangles: " << drum_mesh->GetNumTriangles() << std::endl; - std::cout << "Number of vertices: " << drum_mesh->GetNumNodes() << std::endl; - - DEMMesh::PatchSplitOptions opt; - opt.soft_angle_deg = -1.0f; - opt.patch_normal_max_deg = -1.0f; - opt.block_concave_edges = true; - opt.concave_allow_deg = 0.0f; - opt.patch_min = 1; - opt.patch_max = std::numeric_limits::max(); - opt.seed_largest_first = true; - opt.auto_tune.enabled = false; - - DEMMesh::PatchQualityReport rep_drum; - size_t num_patches = drum_mesh->SplitIntoConvexPatches(120.0f, opt, &rep_drum); - std::cout << "Split into " << num_patches << " patches (concave, quality " - << static_cast(rep_drum.overall) << ")" << std::endl; - } else { - std::cout << "Drum mesh not available, skipping" << std::endl; - } - - // Test PLY export with per-patch colors (debug view) - std::cout << "\n--- Test 7: PLY Export with Patch Colors (per mesh) ---" << std::endl; - { - path out_dir = current_path(); - out_dir /= "DemoOutput_MeshPatch"; - create_directory(out_dir); - - auto export_mesh = [&](const std::string& label, const path& mesh_path, bool is_stl) { - DEMSolver DEMSim; - DEMSim.SetVerbosity("INFO"); - DEMSim.SetMeshOutputFormat("PLY"); - DEMSim.EnableMeshPatchColorOutput(true); - DEMSim.InstructBoxDomainDimension(10, 10, 10); - DEMSim.SetMeshUniversalContact(true); - - auto mat_type = DEMSim.LoadMaterial({{"E", 1e9}, {"nu", 0.3}, {"CoR", 0.6}, {"mu", 0.5}}); - - std::shared_ptr mesh_template; - if (is_stl) { - mesh_template = DEMSim.LoadMeshType(mesh_path.string(), mat_type, true, false); - } else { - mesh_template = DEMSim.LoadMeshType(mesh_path.string(), mat_type, true, false); - } - - if (!mesh_template) { - std::cout << "Failed to load mesh template for " << label << std::endl; - return; - } - - DEMMesh::PatchSplitOptions opt; - opt.soft_angle_deg = -1.0f; - opt.patch_normal_max_deg = -1.0f; - opt.block_concave_edges = true; - opt.concave_allow_deg = 0.0f; - opt.patch_min = 1; - opt.patch_max = std::numeric_limits::max(); - opt.seed_largest_first = true; - opt.auto_tune.enabled = false; - - mesh_template->SplitIntoConvexPatches(120.0f, opt); - mesh_template->SetMaterial(mat_type); - - auto mesh_instance = DEMSim.AddMeshFromTemplate(mesh_template, make_float3(0, 0, 0)); - mesh_instance->SetFamily(0); - mesh_instance->SetMass(1000.); - mesh_instance->SetMOI(make_float3(200., 200., 200.)); - - DEMSim.Initialize(); - - path ply_file = out_dir / ("mesh_patch_colors_" + label + ".ply"); - DEMSim.WriteMeshFile(ply_file); - DEMSim.WaitForPendingOutput(); - std::cout << "Wrote patch-colored PLY to: " << ply_file << std::endl; - }; - - export_mesh("cube", GET_DATA_PATH() / "mesh/cube.obj", false); - export_mesh("sphere", GET_DATA_PATH() / "mesh/sphere.obj", false); - export_mesh("drum", GET_DATA_PATH() / "mesh/drum.stl", true); - } - - std::cout << "\n========================================" << std::endl; - std::cout << "Demo completed successfully!" << std::endl; - std::cout << "========================================" << std::endl; - - return 0; -} diff --git a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp index 4722c916..17798bb4 100644 --- a/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp +++ b/src/demo/ModularTests/DEMTest_SimpleCollisions.cpp @@ -31,23 +31,38 @@ using namespace deme; namespace { -constexpr bool kUseTriangleParticles = true; // toggle to run the STL-based triangle setup +constexpr bool kUseTriangleParticles = false; // toggle to run the STL-based triangle setup constexpr float kMmToMeters = 0.001f; constexpr double kTriangleParticleDensity = 2600.0; constexpr int kNumRuns = 10; constexpr double kGap = 0.005; // 0.5 mm -constexpr double kSpeed = 1.0; // 1 m/s -constexpr double kTimeStep = 1e-5; // seconds -constexpr int kMaxSteps = 100000; // 1 seconds max -constexpr double kContactEps = 1e-6; // contact force threshold +constexpr double kSpeed = 1.0; // 1 m/s magnitude +constexpr double kTimeStep = 1e-5; +constexpr int kMaxSteps = 50000; // oberserve 0.5s fitting with --> kTimeStep +constexpr double kContactEps = 1e-6; + +// NEW: impact angle controls +constexpr double kImpactThetaDeg = 0.0; // 0 = vertical down, 90 = pure lateral +constexpr double kImpactPhiDeg = 0.0; // azimuth in XY plane: 0 -> +X, 90 -> +Y + +// NEW: multi-impact tracking +constexpr int kMaxImpactsToRecord = 8; + double vmax = kSpeed; +struct ImpactEvent { + bool has_rebound = false; // rebound captured at end of this contact episode + double peak_normal_force = 0.0; // peak Fn during this episode + double rebound_speed = 0.0; // |v| right after separation (if has_rebound) + float3 rebound_dir = make_float3(0,0,0); + int start_step = -1; + int end_step = -1; +}; + struct RunResult { bool ok = false; - double rebound_speed = 0.0; - double peak_normal_force = 0.0; - float3 rebound_dir = make_float3(0, 0, 0); + std::vector impacts; // NEW: can contain multiple episodes }; struct Stats { @@ -71,9 +86,7 @@ float3 vec_scale(const float3& v, double s) { Stats calc_stats(const std::vector& values) { Stats s; - if (values.empty()) { - return s; - } + if (values.empty()) return s; s.min = values.front(); s.max = values.front(); double sum = 0.0; @@ -102,47 +115,40 @@ double compute_min_z_rotated(const std::shared_ptr& mesh, const float4& return min_z; } -void assign_patch_ids(const std::shared_ptr& mesh_template, - bool per_triangle_patches, - const std::shared_ptr& mat_type) { - if (!mesh_template) { - return; - } - const size_t num_tris = mesh_template->GetNumTriangles(); - std::vector patch_ids(num_tris, 0); - if (per_triangle_patches) { - for (size_t i = 0; i < num_tris; ++i) { - patch_ids[i] = static_cast(i); - } - } - mesh_template->SetPatchIDs(patch_ids); - mesh_template->SetMaterial(mat_type); +// NEW: build initial velocity vector from speed + angles (theta from +normal, phi azimuth in plane) +float3 build_velocity(double speed, double theta_deg, double phi_deg) { + const double theta = theta_deg * PI / 180.0; + const double phi = phi_deg * PI / 180.0; + + // normal component (downwards for approaching) + const double v_n = -speed * std::cos(theta); + // tangential magnitude + const double v_t = speed * std::sin(theta); + + const double vx = v_t * std::cos(phi); + const double vy = v_t * std::sin(phi); + const double vz = v_n; + + return make_float3((float)vx, (float)vy, (float)vz); } std::shared_ptr load_cube_template(DEMSolver& DEMSim, - const std::shared_ptr& mat_type, - bool per_triangle_patches) { + const std::shared_ptr& mat_type) { auto mesh_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/cube.obj").string(), mat_type, - true, // load_normals - false); // load_uv - if (!mesh_template) { - return nullptr; - } - - assign_patch_ids(mesh_template, per_triangle_patches, mat_type); + true, false); + if (!mesh_template) return nullptr; + mesh_template->SetMaterial(mat_type); return mesh_template; } std::shared_ptr load_triangle_template(DEMSolver& DEMSim, - const std::shared_ptr& mat_type, - bool per_triangle_patches, - float& out_mass, - float3& out_moi) { + const std::shared_ptr& mat_type, + float& out_mass, + float3& out_moi) { std::shared_ptr mesh_template = DEMSim.LoadMeshType((GET_DATA_PATH() / "mesh/simpleTriangleShape4mm.stl").string(), mat_type, true, false); - if (!mesh_template) { - return nullptr; - } + if (!mesh_template) return nullptr; + mesh_template->Scale(kMmToMeters); double volume = 0.0; @@ -153,15 +159,15 @@ std::shared_ptr load_triangle_template(DEMSolver& DEMSim, out_mass = static_cast(volume * kTriangleParticleDensity); out_moi = inertia * static_cast(kTriangleParticleDensity); - assign_patch_ids(mesh_template, per_triangle_patches, mat_type); + mesh_template->SetMaterial(mat_type); return mesh_template; } RunResult run_single_collision(const float4& init_rot, - bool per_triangle_patches, bool use_triangle_particles, const std::string& label, - int run_id) { + int run_id, + const float3& init_vel) { RunResult result; DEMSolver DEMSim; @@ -177,76 +183,101 @@ RunResult run_single_collision(const float4& init_rot, float3 plane_normal = make_float3(0, 0, 1); auto plane = DEMSim.AddBCPlane(make_float3(0, 0, 0), plane_normal, mat_type); auto plane_tracker = DEMSim.Track(plane); - const char* mesh_desc = use_triangle_particles ? "triangle mesh" : "cube mesh"; + auto mesh_template = std::shared_ptr{}; float particle_mass = 1.0f; float3 particle_moi = make_float3(1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f); if (use_triangle_particles) { - mesh_template = load_triangle_template(DEMSim, mat_type, per_triangle_patches, particle_mass, particle_moi); + mesh_template = load_triangle_template(DEMSim, mat_type, particle_mass, particle_moi); } else { - mesh_template = load_cube_template(DEMSim, mat_type, per_triangle_patches); + mesh_template = load_cube_template(DEMSim, mat_type); } if (!mesh_template) { - std::cout << "[" << label << "] Run " << run_id << ": failed to load " << mesh_desc << std::endl; + std::cout << "[" << label << "] Run " << run_id << ": failed to load mesh template" << std::endl; return result; } + double min_z = compute_min_z_rotated(mesh_template, init_rot); double init_z = kGap - min_z; - auto cube = DEMSim.AddMeshFromTemplate(mesh_template, make_float3(0, 0, 0)); - cube->SetFamily(0); - cube->SetMass(particle_mass); - cube->SetMOI(particle_moi); - cube->SetInitQuat(init_rot); - cube->SetInitPos(make_float3(0, 0, static_cast(init_z))); - auto cube_tracker = DEMSim.Track(cube); + auto body = DEMSim.AddMeshFromTemplate(mesh_template, make_float3(0, 0, 0)); + body->SetFamily(0); + body->SetMass(particle_mass); + body->SetMOI(particle_moi); + body->SetInitQuat(init_rot); + body->SetInitPos(make_float3(0, 0, static_cast(init_z))); + auto body_tracker = DEMSim.Track(body); DEMSim.SetInitTimeStep(kTimeStep); DEMSim.Initialize(); - cube_tracker->SetVel(make_float3(0, 0, -static_cast(kSpeed))); - bool contact_started = false; - bool rebound_captured = false; - double peak_normal_force = 0.0; + // NEW: angled initial velocity + body_tracker->SetVel(init_vel); + + bool in_contact = false; + ImpactEvent current{}; + int impacts_recorded = 0; for (int step = 0; step < kMaxSteps; ++step) { DEMSim.DoStepDynamics(); + // NOTE: this is your current way to estimate contact force on the plane float3 plane_force = plane_tracker->ContactAcc(); plane_force = vec_scale(plane_force, plane_tracker->Mass()); double normal_force = std::abs(vec_dot(plane_force, plane_normal)); - peak_normal_force = std::max(peak_normal_force, normal_force); - if (normal_force > kContactEps) { - contact_started = true; + // start of a new contact episode + if (!in_contact && normal_force > kContactEps) { + in_contact = true; + current = ImpactEvent{}; + current.start_step = step; + current.peak_normal_force = normal_force; + } + + // update peak during contact + if (in_contact) { + current.peak_normal_force = std::max(current.peak_normal_force, normal_force); } - float3 vel = cube_tracker->Vel(); - double vel_n = vec_dot(vel, plane_normal); + // end of contact episode + if (in_contact && normal_force <= kContactEps) { + in_contact = false; + current.end_step = step; + + // capture rebound info if moving away (positive normal velocity) + float3 vel = body_tracker->Vel(); + double vel_n = vec_dot(vel, plane_normal); + + if (vel_n > 0.0) { + double speed = vec_length(vel); + float3 dir = make_float3(0, 0, 0); + if (speed > 0) { + dir = vec_scale(vel, 1.0 / speed); + } + current.has_rebound = true; + current.rebound_speed = speed; + current.rebound_dir = dir; + } + + result.impacts.push_back(current); + impacts_recorded++; - if (contact_started && normal_force <= kContactEps && vel_n > 0.0) { - double speed = vec_length(vel); - float3 dir = make_float3(0, 0, 0); - if (speed > 0) { - dir = vec_scale(vel, 1.0 / speed); + if (impacts_recorded >= kMaxImpactsToRecord) { + break; } - result.ok = true; - result.rebound_speed = speed; - result.peak_normal_force = peak_normal_force; - result.rebound_dir = dir; - rebound_captured = true; - break; } } - if (!rebound_captured) { - std::cout << "[" << label << "] Run " << run_id << ": rebound not captured within max steps" << std::endl; + result.ok = !result.impacts.empty(); + if (!result.ok) { + std::cout << "[" << label << "] Run " << run_id << ": no impacts recorded within max steps" << std::endl; } return result; } +// Updated stats: by default we evaluate the FIRST rebound episode that has_rebound==true void print_stats_block(const std::string& label, const std::vector& results) { std::vector speeds; @@ -254,16 +285,29 @@ void print_stats_block(const std::string& label, std::vector dir_x; std::vector dir_y; std::vector dir_z; + std::vector n_impacts; for (const auto& r : results) { - if (!r.ok) { + if (!r.ok) continue; + + n_impacts.push_back((double)r.impacts.size()); + + // pick first episode with rebound + const ImpactEvent* chosen = nullptr; + for (const auto& ev : r.impacts) { + if (ev.has_rebound) { chosen = &ev; break; } + } + if (!chosen) { + // still record peak of first impact if rebound wasn't detected + forces.push_back(r.impacts.front().peak_normal_force); continue; } - speeds.push_back(r.rebound_speed); - forces.push_back(r.peak_normal_force); - dir_x.push_back(r.rebound_dir.x); - dir_y.push_back(r.rebound_dir.y); - dir_z.push_back(r.rebound_dir.z); + + speeds.push_back(chosen->rebound_speed); + forces.push_back(chosen->peak_normal_force); + dir_x.push_back(chosen->rebound_dir.x); + dir_y.push_back(chosen->rebound_dir.y); + dir_z.push_back(chosen->rebound_dir.z); } Stats s_speed = calc_stats(speeds); @@ -271,8 +315,11 @@ void print_stats_block(const std::string& label, Stats s_dx = calc_stats(dir_x); Stats s_dy = calc_stats(dir_y); Stats s_dz = calc_stats(dir_z); + Stats s_ni = calc_stats(n_impacts); std::cout << "\n=== " << label << " stats (population stddev) ===" << std::endl; + std::cout << "Impacts per run: mean=" << s_ni.mean << " min=" << s_ni.min << " max=" << s_ni.max + << " std=" << s_ni.stddev << std::endl; std::cout << "Rebound speed [m/s]: mean=" << s_speed.mean << " min=" << s_speed.min << " max=" << s_speed.max << " std=" << s_speed.stddev << std::endl; std::cout << "Peak normal force [N]: mean=" << s_force.mean << " min=" << s_force.min << " max=" << s_force.max @@ -285,6 +332,11 @@ void print_stats_block(const std::string& label, << " std=" << s_dz.stddev << std::endl; } +// Rotations +float4 flat_quat() { + return make_float4(0, 0, 0, 1); // NEW: identity +} + float4 edge_quat() { float4 q = make_float4(0, 0, 0, 1); q = RotateQuat(q, make_float3(1, 0, 0), static_cast(PI / 4.0)); @@ -300,23 +352,35 @@ float4 corner_quat() { void run_scenario(const std::string& label, const float4& rot, - bool per_triangle_patches, - bool use_triangle_particles) { + bool use_triangle_particles, + const float3& init_vel) { std::cout << "\n========================================" << std::endl; std::cout << label << std::endl; std::cout << "========================================" << std::endl; std::cout << "Using mesh: " << (use_triangle_particles ? "simpleTriangleShape4mm.stl" : "cube.obj") << std::endl; + std::cout << "Init vel: (" << init_vel.x << ", " << init_vel.y << ", " << init_vel.z << ")" + << " |v|=" << vec_length(init_vel) << std::endl; std::vector results; results.reserve(kNumRuns); for (int i = 0; i < kNumRuns; ++i) { - RunResult r = run_single_collision(rot, per_triangle_patches, use_triangle_particles, label, i); + RunResult r = run_single_collision(rot, use_triangle_particles, label, i, init_vel); results.push_back(r); + if (r.ok) { - std::cout << "Run " << i << ": speed=" << r.rebound_speed << " dir=(" << r.rebound_dir.x << ", " - << r.rebound_dir.y << ", " << r.rebound_dir.z << ") force=" << r.peak_normal_force - << std::endl; + std::cout << "Run " << i << ": impacts=" << r.impacts.size(); + // print first rebound episode if exists + const ImpactEvent* chosen = nullptr; + for (const auto& ev : r.impacts) { if (ev.has_rebound) { chosen = &ev; break; } } + if (chosen) { + std::cout << " rebound_speed=" << chosen->rebound_speed + << " dir=(" << chosen->rebound_dir.x << ", " << chosen->rebound_dir.y << ", " << chosen->rebound_dir.z << ")" + << " peakFn=" << chosen->peak_normal_force; + } else { + std::cout << " (no rebound captured) peakFn_first=" << r.impacts.front().peak_normal_force; + } + std::cout << std::endl; } } @@ -332,13 +396,16 @@ int main() { std::cout << "Particle mesh mode: " << (kUseTriangleParticles ? "simpleTriangleShape4mm.stl" : "cube.obj") << std::endl; - float4 q_edge = edge_quat(); + // NEW: build velocity once (same for all scenarios) + float3 init_vel = build_velocity(kSpeed, kImpactThetaDeg, kImpactPhiDeg); + + float4 q_flat = flat_quat(); + float4 q_edge = edge_quat(); float4 q_corner = corner_quat(); - run_scenario("Edge impact - single patch", q_edge, false, kUseTriangleParticles); - run_scenario("Edge impact - 12 patches", q_edge, true, kUseTriangleParticles); - run_scenario("Corner impact - single patch", q_corner, false, kUseTriangleParticles); - run_scenario("Corner impact - 12 patches", q_corner, true, kUseTriangleParticles); + run_scenario("Flat impact", q_flat, kUseTriangleParticles, init_vel); + run_scenario("Edge impact", q_edge, kUseTriangleParticles, init_vel); + run_scenario("Corner impact", q_corner, kUseTriangleParticles, init_vel); std::cout << "\n========================================" << std::endl; std::cout << "Test completed" << std::endl;