Skip to content

Commit

Permalink
Core: fixed bugs in CUDA backend and improved its error checking.
Browse files Browse the repository at this point in the history
Also removed -g compiler flags from gcc.
- fixed indexing errors in Hamiltonians and removed unneeded atomics
- the managed allocator now contains functions to deal with cuda errors
- cuda error handling functions are now used throughout vectormath and manifoldmath to hopefully catch bugs earlier and give better backtracing
Note: for some reason the exceptions thrown by CudaHandleError are never caught and the error messages seem to always point to managed_allocator::allocate or managed_allocator::deallocate...
  • Loading branch information
GPMueller committed Jan 26, 2018
1 parent 953b3f7 commit 8dd8d35
Show file tree
Hide file tree
Showing 9 changed files with 163 additions and 102 deletions.
4 changes: 2 additions & 2 deletions CMake/CompilerFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1)
message(FATAL_ERROR "GCC version must be at least 5.1!")
endif()
### Compiler Flags
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O2 -std=c++11 -DEIGEN_NO_DEBUG" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -std=c++11 -DEIGEN_NO_DEBUG" )
### Linker Flags
if (APPLE)
set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -DEIGEN_NO_DEBUG -Wl,-no_compact_unwind -pthread" )
Expand Down Expand Up @@ -54,7 +54,7 @@ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel")
### Message
MESSAGE( STATUS ">> Chose compiler: Intel" )
### Compiler Flags
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O3 -std=c++11 -DEIGEN_NO_DEBUG" )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -std=c++11 -DEIGEN_NO_DEBUG" )
### Linker Flags
if (APPLE)
set( CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -DEIGEN_NO_DEBUG -Wl,-no_compact_unwind -pthread" )
Expand Down
43 changes: 16 additions & 27 deletions core/include/engine/Managed_Allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,24 @@

#ifdef USE_CUDA

#include <utility/Exception.hpp>

#include <cuda.h>
#include <cuda_runtime.h>

#include <stdio.h>

// static void HandleError( cudaError_t err, const char *file, int line )
// {
// // CUDA error handeling from the "CUDA by example" book
// if (err != cudaSuccess)
// {
// printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
// exit( EXIT_FAILURE );
// }
// }
static void CudaHandleError( cudaError_t err, const char *file, int line, const std::string & function)
{
if (err != cudaSuccess)
{
throw Utility::S_Exception(Utility::Exception_Classifier::CUDA_Error, Utility::Log_Level::Severe,
std::string(cudaGetErrorString( err )), file, line, function);
}
}

#define CU_HANDLE_ERROR( err ) (CudaHandleError( err, __FILE__, __LINE__, __func__ ))

// #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define CU_CHECK_ERROR( ) (CudaHandleError( cudaGetLastError(), __FILE__, __LINE__, __func__ ))


template<class T>
Expand All @@ -27,39 +29,26 @@ class managed_allocator : public std::allocator<T>
public:
using value_type = T;


template<typename _Tp1>
struct rebind
{
typedef managed_allocator<_Tp1> other;
};


value_type* allocate(size_t n)
{
value_type* result = nullptr;

cudaError_t err = cudaMallocManaged(&result, n*sizeof(T), cudaMemAttachGlobal);
if (err != cudaSuccess)
{
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), __FILE__, __LINE__ );
exit( EXIT_FAILURE );
}

CU_HANDLE_ERROR( cudaMallocManaged(&result, n*sizeof(value_type)) );

return result;
}

void deallocate(value_type* ptr, size_t)
{
cudaError_t err = cudaFree(ptr);
if (err != cudaSuccess)
{
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), __FILE__, __LINE__ );
exit( EXIT_FAILURE );
}
CU_HANDLE_ERROR( cudaFree(ptr) );
}


managed_allocator() throw(): std::allocator<T>() { } //fprintf(stderr, "Hello managed allocator!\n"); }
managed_allocator(const managed_allocator &a) throw(): std::allocator<T>(a) { }
template <class U>
Expand Down
5 changes: 3 additions & 2 deletions core/include/utility/Exception.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ namespace Utility
Non_existing_Chain,
Input_parse_failed,
Bad_File_Content,
Standard_Exception,
Unknown_Exception
Standard_Exception,
CUDA_Error,
Unknown_Exception
// TODO: from Chain.cpp
// Last image deletion ?
// Empty clipboard ?
Expand Down
20 changes: 10 additions & 10 deletions core/src/engine/Hamiltonian_Heisenberg_Neighbours.cu
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ namespace Engine
if (this->idx_dmi >=0 ) E_DMI(spins, energy_contributions_per_spin[idx_dmi].second);
// DDI
if (this->idx_ddi >=0 ) E_DDI(spins, energy_contributions_per_spin[idx_ddi].second);

CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
}


Expand All @@ -159,7 +162,7 @@ namespace Engine
{
int ispin = idx + ibasis;
if ( cu_check_atom_type(atom_types[ispin]) )
atomicAdd(&Energy[ispin], - mu_s[ibasis] * external_field_magnitude * external_field_normal.dot(spins[ispin]));
Energy[ispin] -= mu_s[ibasis] * external_field_magnitude * external_field_normal.dot(spins[ispin]);
}
}
}
Expand All @@ -179,7 +182,7 @@ namespace Engine
{
int ispin = idx + anisotropy_indices[iani];
if ( cu_check_atom_type(atom_types[ispin]) )
atomicAdd(&Energy[ispin], - anisotropy_magnitude[idx] * std::pow(anisotropy_normal[idx].dot(spins[ispin]), 2.0));
Energy[ispin] -= anisotropy_magnitude[iani] * std::pow(anisotropy_normal[iani].dot(spins[ispin]), 2.0);
}
}
}
Expand Down Expand Up @@ -291,6 +294,9 @@ namespace Engine
this->Gradient_DMI(spins, gradient);
// DD
this->Gradient_DDI(spins, gradient);

CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
}

__global__ void HNeigh_CU_Gradient_Zeeman( const int * atom_types, const int n_cell_atoms, const scalar * mu_s, const scalar external_field_magnitude, const Vector3 external_field_normal, Vector3 * gradient, size_t n_cells_total)
Expand All @@ -304,10 +310,7 @@ namespace Engine
int ispin = idx + ibasis;
if ( cu_check_atom_type(atom_types[ispin]) )
{
for (int dim=0; dim<3 ; dim++)
{
atomicAdd(&gradient[ispin][dim], - mu_s[ibasis] * external_field_magnitude*external_field_normal[idx]);
}
gradient[ispin] -= mu_s[ibasis] * external_field_magnitude*external_field_normal;
}
}
}
Expand All @@ -330,10 +333,7 @@ namespace Engine
if ( cu_check_atom_type(atom_types[ispin]) )
{
scalar sc = -2 * anisotropy_magnitude[iani] * anisotropy_normal[iani].dot(spins[ispin]);
for (int dim=0; dim<3 ; dim++)
{
atomicAdd(&gradient[ispin][dim], sc*anisotropy_normal[iani][dim]);
}
gradient[ispin] += sc*anisotropy_normal[iani];
}
}
}
Expand Down
20 changes: 8 additions & 12 deletions core/src/engine/Hamiltonian_Heisenberg_Pairs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ namespace Engine
// Quadruplets
if (this->idx_quadruplet >=0 ) E_Quadruplet(spins, contributions[idx_quadruplet].second);

cudaDeviceSynchronize();
CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
}


Expand All @@ -145,7 +146,7 @@ namespace Engine
{
int ispin = idx + ibasis;
if ( cu_check_atom_type(atom_types[ispin]) )
atomicAdd(&Energy[ispin], - mu_s[ibasis] * external_field_magnitude * external_field_normal.dot(spins[ispin]));
Energy[ispin] -= mu_s[ibasis] * external_field_magnitude * external_field_normal.dot(spins[ispin]);
}
}
}
Expand All @@ -166,7 +167,7 @@ namespace Engine
{
int ispin = idx + anisotropy_indices[iani];
if ( cu_check_atom_type(atom_types[ispin]) )
atomicAdd(&Energy[ispin], - anisotropy_magnitude[idx] * std::pow(anisotropy_normal[idx].dot(spins[ispin]), 2.0));
Energy[ispin] -= anisotropy_magnitude[iani] * std::pow(anisotropy_normal[iani].dot(spins[ispin]), 2.0);
}
}
}
Expand Down Expand Up @@ -328,7 +329,8 @@ namespace Engine
// Quadruplet
this->Gradient_Quadruplet(spins, gradient);

cudaDeviceSynchronize();
CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
}


Expand All @@ -343,10 +345,7 @@ namespace Engine
int ispin = idx + ibasis;
if ( cu_check_atom_type(atom_types[ispin]) )
{
for (int dim=0; dim<3 ; dim++)
{
atomicAdd(&gradient[ispin][dim], - mu_s[ibasis] * external_field_magnitude*external_field_normal[idx]);
}
gradient[ispin] -= mu_s[ibasis] * external_field_magnitude*external_field_normal;
}
}
}
Expand All @@ -370,10 +369,7 @@ namespace Engine
if ( cu_check_atom_type(atom_types[ispin]) )
{
scalar sc = -2 * anisotropy_magnitude[iani] * anisotropy_normal[iani].dot(spins[ispin]);
for (int dim=0; dim<3 ; dim++)
{
atomicAdd(&gradient[ispin][dim], sc*anisotropy_normal[iani][dim]);
}
gradient[ispin] += sc*anisotropy_normal[iani];
}
}
}
Expand Down
9 changes: 6 additions & 3 deletions core/src/engine/Manifoldmath.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ namespace Engine
scalar proj=Vectormath::dot(vf1, vf2);
// Project vf1
cu_project_orthogonal<<<(n+1023)/1024, 1024>>>(vf1.data(), vf2.data(), proj, n);
cudaDeviceSynchronize();
CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
}

void invert_parallel(vectorfield & vf1, const vectorfield & vf2)
Expand Down Expand Up @@ -82,7 +83,8 @@ namespace Engine
{
int n = vf1.size();
cu_project_tangential<<<(n+1023)/1024, 1024>>>(vf1.data(), vf2.data(), n);
cudaDeviceSynchronize();
CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
}


Expand Down Expand Up @@ -124,7 +126,8 @@ namespace Engine
scalarfield sf(n);

cu_dist_geodesic_2<<<(n+1023)/1024, 1024>>>(vf1.data(), vf2.data(), sf.data(), n);
cudaDeviceSynchronize();
CU_CHECK_ERROR();
CU_HANDLE_ERROR( cudaDeviceSynchronize() );
scalar dist = Vectormath::sum(sf);

return sqrt(dist);
Expand Down
2 changes: 1 addition & 1 deletion core/src/engine/Vectormath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ namespace Engine
}// endfor j
}// endfor dim

};// end Build_Spins
}// end Build_Spins


std::array<scalar,3> Magnetization(const vectorfield & vf)
Expand Down

0 comments on commit 8dd8d35

Please sign in to comment.