Skip to content

Commit

Permalink
nvml: force 64bits types for mem sizes
Browse files Browse the repository at this point in the history
size_t can be a bit... imprevisible on x86
  • Loading branch information
tpruvot committed Sep 27, 2016
1 parent 5a0b779 commit f84c83a
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 15 deletions.
14 changes: 8 additions & 6 deletions cuda.cpp
Expand Up @@ -218,16 +218,18 @@ void cuda_reset_device(int thr_id, bool *init)
int cuda_available_memory(int thr_id)
{
int dev_id = device_map[thr_id % MAX_GPUS];
size_t mtotal = 0, mfree = 0;
#if defined(_WIN32) && defined(USE_WRAPNVML)
uint64_t tot64 = 0, free64 = 0;
// cuda (6.5) one can crash on pascal and dont handle 8GB
nvapiMemGetInfo(dev_id, &mfree, &mtotal);
nvapiMemGetInfo(dev_id, &free64, &tot64);
return (int) (free64 / (1024 * 1024));
#else
size_t mtotal = 0, mfree = 0;
cudaSetDevice(dev_id);
cudaDeviceSynchronize();
cudaMemGetInfo(&mfree, &mtotal);
#endif
return (int) (mfree / (1024 * 1024));
#endif
}

// Check (and reset) last cuda error, and report it in logs
Expand All @@ -252,9 +254,9 @@ int cuda_gpu_info(struct cgpu_info *gpu)
{
cudaDeviceProp props;
if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
gpu->gpu_clock = props.clockRate;
gpu->gpu_memclock = props.memoryClockRate;
gpu->gpu_mem = (props.totalGlobalMem / 1024); // kB
gpu->gpu_clock = (uint32_t) props.clockRate;
gpu->gpu_memclock = (uint32_t) props.memoryClockRate;
gpu->gpu_mem = (uint64_t) (props.totalGlobalMem / 1024); // kB
#if defined(_WIN32) && defined(USE_WRAPNVML)
// required to get mem size > 4GB (size_t too small for bytes on 32bit)
nvapiMemGetInfo(gpu->gpu_id, &gpu->gpu_memfree, &gpu->gpu_mem); // kB
Expand Down
10 changes: 5 additions & 5 deletions miner.h
Expand Up @@ -367,10 +367,10 @@ struct cgpu_info {
uint16_t gpu_fan;
uint16_t gpu_fan_rpm;
uint16_t gpu_arch;
int gpu_clock;
int gpu_memclock;
size_t gpu_mem;
size_t gpu_memfree;
uint32_t gpu_clock;
uint32_t gpu_memclock;
uint64_t gpu_mem;
uint64_t gpu_memfree;
uint32_t gpu_power;
double gpu_vddc;
int16_t gpu_pstate;
Expand All @@ -383,7 +383,7 @@ struct cgpu_info {

char gpu_sn[64];
char gpu_desc[64];
float intensity;
double intensity;
uint32_t throughput;
};

Expand Down
6 changes: 3 additions & 3 deletions nvml.cpp
Expand Up @@ -1657,15 +1657,15 @@ int nvapi_set_memclock(unsigned int devNum, uint32_t clock)
}

// Replacement for WIN32 CUDA 6.5 on pascal
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total)
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total)
{
NvAPI_Status ret = NVAPI_OK;
NV_DISPLAY_DRIVER_MEMORY_INFO mem = { 0 };
mem.version = NV_DISPLAY_DRIVER_MEMORY_INFO_VER;
unsigned int devNum = nvapi_dev_map[dev_id % MAX_GPUS];
if ((ret = NvAPI_GPU_GetMemoryInfo(phys[devNum], &mem)) == NVAPI_OK) {
*total = mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
*free = mem.curAvailableDedicatedVideoMemory;
*total = (uint64_t) mem.dedicatedVideoMemory;// mem.availableDedicatedVideoMemory;
*free = (uint64_t) mem.curAvailableDedicatedVideoMemory;
}
return (int) ret;
}
Expand Down
2 changes: 1 addition & 1 deletion nvml.h
Expand Up @@ -231,7 +231,7 @@ unsigned int nvapi_devnum(int dev_id);
int nvapi_devid(unsigned int devNum);

// cuda Replacement for 6.5 compat
int nvapiMemGetInfo(int dev_id, size_t *free, size_t *total);
int nvapiMemGetInfo(int dev_id, uint64_t *free, uint64_t *total);
#endif

#endif /* USE_WRAPNVML */
Expand Down

0 comments on commit f84c83a

Please sign in to comment.