Skip to content

windows PCI (or least CUDA) locality #108

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
ompiteam opened this issue Sep 12, 2014 · 1 comment
Open

windows PCI (or least CUDA) locality #108

ompiteam opened this issue Sep 12, 2014 · 1 comment
Assignees
Milestone

Comments

@ompiteam
Copy link
Contributor

Based on http://www.open-mpi.org/community/lists/hwloc-users/2013/11/0926.php

Also useful: http://blogs.technet.com/b/winserverperformance/archive/2008/09/13/getting-system-topology-information-on-windows.aspx

This code should map a CUdevice to a numa node (by enumerating all PCI devices). I have not compiled the the code in this form or tested it as is, but the calls should work fine for mapping any cuda device to the OS enumeration wrt to PCI device location:

{{{
long GetNumaNode(CUdevice dev)
{
BOOL ret;
DWORD lastError;
// get the cuda device string
char cuDevString[CUDA_DEV_STRING_LEN];
unsigned long cudaBus;
unsigned long cudaSubdevice;
unsigned long cudaFunction;
CUresult status = cuDeviceGetPCIBusId(cuDevString, CUDA_DEV_STRING_LEN, dev);
assert(CUDA_SUCCESS == status);
if (CUDA_SUCCESS != status) {
return 0;
}
char *tmp;
char *tmp2;
char del[] = ":.";
// remove domain
tmp = strtok_s(cuDevString, del, &tmp2);
// get bus
tmp = strtok_s(NULL, del, &tmp2);
sscanf_s(tmp, "%x", &cudaBus);
// get subdevice
tmp = strtok_s(NULL, del, &tmp2);
sscanf_s(tmp, "%x", &cudaSubdevice);
// get function
tmp = strtok_s(NULL, del, &tmp2);
sscanf_s(tmp, "%x", &cudaFunction);
// Use NULL as the first parameter as we need to look at non display devices too
HDEVINFO hNvDevInfo = SetupDiGetClassDevs(NULL, NULL, NULL, DIGCF_PRESENT | DIGCF_ALLCLASSES);
if(hNvDevInfo == INVALID_HANDLE_VALUE)
{
assert(!"INVALID_HANDLE_VALUE");
return 0;
}
// Find the deviceInfoData for each GPU
DWORD deviceIndex;
for (deviceIndex = 0; ; deviceIndex++)
{
SP_DEVINFO_DATA deviceInfoData;
unsigned long bus;
unsigned long subdevice;
unsigned long function;
deviceInfoData.cbSize = sizeof(SP_DEVINFO_DATA);
ret = SetupDiEnumDeviceInfo(hNvDevInfo, deviceIndex, &deviceInfoData);
if (!ret)
{
// MSDN says:
// call SetupDiEnumDeviceInfo until there are no more values (the function fails and a call
// to GetLastError returns ERROR_NO_MORE_ITEMS).
lastError = GetLastError();
assert(lastError == ERROR_NO_MORE_ITEMS);
break;
}

    char locinfo[256];
    ret = SetupDiGetDeviceRegistryPropertyA(hNvDevInfo, &deviceInfoData, SPDRP_LOCATION_INFORMATION, NULL,
        (PBYTE)locinfo, sizeof(locinfo), NULL);
    if (!ret)
    {
        lastError = GetLastError();
    }
    bool dataSet = false;
    if (strncmp(locinfo, "PCI", 3) == 0) {
        char *busString = strstr(locinfo, "bus");
        if (busString) {
            busString += 3;
            char *deviceString = strstr(locinfo, ",");
            if (deviceString) {
                deviceString[0] = 0;
                bus = atoi(busString);
                deviceString++;
                deviceString = strstr(deviceString, "device");
                if (deviceString) {
                    deviceString+=6;
                    char *functionStr = strstr(deviceString, ",");
                    if (functionStr) {
                        functionStr[0] = 0;
                        subdevice = atoi(deviceString);
                        functionStr++;
                        functionStr = strstr(functionStr, "function");
                        if (functionStr) {
                            functionStr+=8;
                            function = atoi(functionStr);
                            dataSet = true;
                        }
                    }
                }
            }
        }
    }
    if (dataSet &&
        (bus == cudaBus) &&
        (subdevice == cudaSubdevice) &&
        (function == cudaFunction))
    {
        ret = SetupDiGetDeviceRegistryPropertyA(hNvDevInfo, &deviceInfoData, SPDRP_HARDWAREID, NULL,
            (PBYTE)locinfo, sizeof(locinfo), NULL);
        printf("locinfo %s\n", locinfo);
        int data[20];
        data[0] = 0;
        DEVPROPTYPE type;
        DEVPROPKEY key = DEVPKEY_Numa_Proximity_Domain;
        lastError = 0;
        ret =  SetupDiGetDeviceProperty(hNvDevInfo, &deviceInfoData,&key , &type, (PBYTE)&data[0], 20*sizeof(int), NULL,0);
        if (!ret)
        {
            lastError = GetLastError();
        }
        printf("DEVPKEY_Numa_Proximity_Domain %d err %d\n", data[0], lastError);
        key = DEVPKEY_Device_Numa_Node;
        lastError = 0;
        ret =  SetupDiGetDeviceProperty(hNvDevInfo, &deviceInfoData,&key , &type, (PBYTE)&data[0], 20*sizeof(int), NULL,0);
        if (!ret)
        {
            lastError = GetLastError();
        }
        printf("DEVPKEY_Device_Numa_Node %d err %d\n", data[0], lastError);
        return data[0];
    }
}
return -1;

}

@ompiteam ompiteam self-assigned this Sep 12, 2014
@ompiteam ompiteam added this to the Future milestone Sep 12, 2014
@ompiteam
Copy link
Contributor Author

Imported from trac issue 107. Created by bgoglin on 2013-11-27T10:52:41, last modified: 2013-11-27T10:52:41

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants