## MIG Configuration Workflow (Admin)

```bash
# Step 1: Enable MIG mode (requires GPU reset)
sudo nvidia-smi -i 0 -mig 1
sudo nvidia-smi --gpu-reset

# Step 2: List available profiles
nvidia-smi mig -lgip

# Step 3: Create GPU instances
# Example: Create two 3g.40gb instances on H100
sudo nvidia-smi mig -cgi 9,9 -i 0

# Step 4: Create compute instances for each GPU instance
sudo nvidia-smi mig -cci -gi 0
sudo nvidia-smi mig -cci -gi 1

# Step 5: List created instances
nvidia-smi mig -lgi
nvidia-smi mig -lci
```

## Query MIG Configuration with NVML

In [None]:
%%writefile mig_config_query.cu
#include <stdio.h>
#include <cuda_runtime.h>
#include <nvml.h>

#define CHECK_NVML(call) \
    do { \
        nvmlReturn_t result = call; \
        if (result != NVML_SUCCESS) { \
            printf("NVML: %s\n", nvmlErrorString(result)); \
            return; \
        } \
    } while(0)

void queryMIGConfiguration(unsigned int deviceIdx) {
    nvmlDevice_t device;
    CHECK_NVML(nvmlDeviceGetHandleByIndex(deviceIdx, &device));
    
    char name[NVML_DEVICE_NAME_BUFFER_SIZE];
    CHECK_NVML(nvmlDeviceGetName(device, name, sizeof(name)));
    printf("\n=== Device %u: %s ===\n", deviceIdx, name);
    
    // Check MIG mode
    unsigned int currentMode, pendingMode;
    nvmlReturn_t migResult = nvmlDeviceGetMigMode(device, &currentMode, &pendingMode);
    
    if (migResult == NVML_ERROR_NOT_SUPPORTED) {
        printf("MIG not supported on this GPU\n");
        return;
    }
    CHECK_NVML(migResult);
    
    printf("MIG Mode: %s (pending: %s)\n",
           currentMode ? "ENABLED" : "DISABLED",
           pendingMode ? "ENABLED" : "DISABLED");
    
    if (!currentMode) {
        printf("MIG is disabled. Enable with: sudo nvidia-smi -mig 1\n");
        return;
    }
    
    // List GPU instance profiles
    printf("\nAvailable GPU Instance Profiles:\n");
    for (unsigned int profileId = 0; profileId < 20; profileId++) {
        nvmlGpuInstanceProfileInfo_t profileInfo;
        nvmlReturn_t result = nvmlDeviceGetGpuInstanceProfileInfo(
            device, profileId, &profileInfo);
        
        if (result == NVML_SUCCESS) {
            printf("  Profile %u: memory=%llu MB, slices=%u, instances=%u\n",
                   profileId,
                   profileInfo.memorySizeMB,
                   profileInfo.sliceCount,
                   profileInfo.instanceCount);
        }
    }
    
    // List existing GPU instances
    printf("\nExisting GPU Instances:\n");
    unsigned int count = 0;
    nvmlGpuInstance_t gpuInstances[NVML_MAX_GPU_INSTANCES];
    
    // Try to get instances for each profile
    for (unsigned int profileId = 0; profileId < 20; profileId++) {
        nvmlGpuInstanceProfileInfo_t profileInfo;
        if (nvmlDeviceGetGpuInstanceProfileInfo(device, profileId, &profileInfo) == NVML_SUCCESS) {
            unsigned int instCount;
            if (nvmlDeviceGetGpuInstances(device, profileId, gpuInstances, &instCount) == NVML_SUCCESS) {
                for (unsigned int i = 0; i < instCount; i++) {
                    nvmlGpuInstanceInfo_t info;
                    if (nvmlGpuInstanceGetInfo(gpuInstances[i], &info) == NVML_SUCCESS) {
                        printf("  Instance %u: id=%u, profileId=%u\n",
                               count++, info.id, info.profileId);
                    }
                }
            }
        }
    }
    
    if (count == 0) {
        printf("  No GPU instances created\n");
    }
}

int main() {
    printf("=== MIG Configuration Query ===\n");
    
    nvmlReturn_t result = nvmlInit();
    if (result != NVML_SUCCESS) {
        printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));
        return 1;
    }
    
    unsigned int deviceCount;
    CHECK_NVML(nvmlDeviceGetCount(&deviceCount));
    
    for (unsigned int i = 0; i < deviceCount; i++) {
        queryMIGConfiguration(i);
    }
    
    nvmlShutdown();
    return 0;
}

In [None]:
!nvcc -O3 -arch=sm_80 mig_config_query.cu -o mig_config_query -lnvidia-ml && ./mig_config_query

## Targeting Specific MIG Instances

When MIG is enabled, each instance has a unique UUID:

In [None]:
# List MIG devices with UUIDs
!nvidia-smi -L 2>/dev/null

In [None]:
%%writefile target_mig_instance.cu
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>

#define CHECK_CUDA(call) \
    do { \
        cudaError_t err = call; \
        if (err != cudaSuccess) { \
            printf("CUDA error: %s\n", cudaGetErrorString(err)); \
            exit(1); \
        } \
    } while(0)

__global__ void simpleKernel(int* result) {
    *result = blockIdx.x * blockDim.x + threadIdx.x;
}

int main(int argc, char** argv) {
    printf("=== Target MIG Instance Demo ===\n\n");
    
    // Check environment
    const char* cvd = getenv("CUDA_VISIBLE_DEVICES");
    printf("CUDA_VISIBLE_DEVICES: %s\n", cvd ? cvd : "(not set)");
    
    int deviceCount;
    CHECK_CUDA(cudaGetDeviceCount(&deviceCount));
    printf("Available CUDA devices: %d\n\n", deviceCount);
    
    for (int i = 0; i < deviceCount; i++) {
        cudaDeviceProp prop;
        CHECK_CUDA(cudaGetDeviceProperties(&prop, i));
        
        printf("Device %d: %s\n", i, prop.name);
        printf("  Memory: %.2f GB\n", 
               prop.totalGlobalMem / (1024.0*1024.0*1024.0));
        printf("  SMs: %d\n", prop.multiProcessorCount);
        
        // Set device and run kernel
        CHECK_CUDA(cudaSetDevice(i));
        
        int* d_result;
        int h_result = -1;
        CHECK_CUDA(cudaMalloc(&d_result, sizeof(int)));
        
        simpleKernel<<<1, 1>>>(d_result);
        CHECK_CUDA(cudaDeviceSynchronize());
        CHECK_CUDA(cudaMemcpy(&h_result, d_result, sizeof(int), 
                               cudaMemcpyDeviceToHost));
        
        printf("  Kernel result: %d\n", h_result);
        CHECK_CUDA(cudaFree(d_result));
    }
    
    printf("\nTo target a specific MIG instance:\n");
    printf("  export CUDA_VISIBLE_DEVICES=MIG-<uuid>\n");
    printf("  ./program\n");
    
    return 0;
}

In [None]:
!nvcc -O3 -arch=sm_80 target_mig_instance.cu -o target_mig_instance && ./target_mig_instance

## MIG with CUDA MPS

MPS (Multi-Process Service) can run on MIG instances for even finer sharing:

```bash
# Start MPS on a specific MIG instance
export CUDA_VISIBLE_DEVICES=MIG-<uuid>
export CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps-mig
export CUDA_MPS_LOG_DIRECTORY=/tmp/nvidia-mps-log

nvidia-cuda-mps-control -d

# Run multiple processes sharing the MIG instance
./app1 &
./app2 &
./app3 &
wait

# Stop MPS
echo quit | nvidia-cuda-mps-control
```

## Compute Instance Profiles

Each GPU Instance can be further divided into Compute Instances:

In [None]:
%%writefile compute_instance_query.cu
#include <stdio.h>
#include <nvml.h>

#define CHECK_NVML(call) \
    do { \
        nvmlReturn_t result = call; \
        if (result != NVML_SUCCESS) { \
            printf("NVML: %s\n", nvmlErrorString(result)); \
            return; \
        } \
    } while(0)

void listComputeInstanceProfiles(nvmlGpuInstance_t gpuInstance, unsigned int giId) {
    printf("  Compute Instance Profiles for GPU Instance %u:\n", giId);
    
    for (unsigned int profileId = 0; profileId < 10; profileId++) {
        nvmlComputeInstanceProfileInfo_t profileInfo;
        nvmlReturn_t result = nvmlGpuInstanceGetComputeInstanceProfileInfo(
            gpuInstance, profileId, NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED,
            &profileInfo);
        
        if (result == NVML_SUCCESS) {
            printf("    Profile %u: SMs=%u, instances=%u\n",
                   profileId,
                   profileInfo.smCount,
                   profileInfo.instanceCount);
        }
    }
}

int main() {
    printf("=== Compute Instance Profiles ===\n\n");
    
    if (nvmlInit() != NVML_SUCCESS) {
        printf("Failed to initialize NVML\n");
        return 1;
    }
    
    nvmlDevice_t device;
    CHECK_NVML(nvmlDeviceGetHandleByIndex(0, &device));
    
    unsigned int currentMode, pendingMode;
    if (nvmlDeviceGetMigMode(device, &currentMode, &pendingMode) != NVML_SUCCESS ||
        !currentMode) {
        printf("MIG not enabled. Showing theoretical profiles...\n\n");
        
        // Show what would be available
        printf("For 3g.40gb GPU Instance:\n");
        printf("  - 3c.3g.40gb: All 3 slices (full)\n");
        printf("  - 2c.3g.40gb: 2/3 compute slices\n");
        printf("  - 1c.3g.40gb: 1/3 compute slices\n");
        printf("\nFor 7g.80gb GPU Instance (A100/H100):\n");
        printf("  - 7c.7g.80gb: All compute (full)\n");
        printf("  - 4c.7g.80gb: 4/7 compute\n");
        printf("  - 3c.7g.80gb: 3/7 compute\n");
        printf("  - 2c.7g.80gb: 2/7 compute\n");
        printf("  - 1c.7g.80gb: 1/7 compute\n");
    } else {
        // Query actual instances
        printf("MIG enabled. Querying actual configuration...\n\n");
        
        nvmlGpuInstance_t gpuInstances[8];
        for (unsigned int profileId = 0; profileId < 20; profileId++) {
            unsigned int count;
            if (nvmlDeviceGetGpuInstances(device, profileId, gpuInstances, &count) == NVML_SUCCESS) {
                for (unsigned int i = 0; i < count; i++) {
                    nvmlGpuInstanceInfo_t info;
                    if (nvmlGpuInstanceGetInfo(gpuInstances[i], &info) == NVML_SUCCESS) {
                        listComputeInstanceProfiles(gpuInstances[i], info.id);
                    }
                }
            }
        }
    }
    
    nvmlShutdown();
    return 0;
}

In [None]:
!nvcc -O3 -arch=sm_80 compute_instance_query.cu -o compute_instance_query -lnvidia-ml && ./compute_instance_query

## MIG Configuration Reference

### H100 80GB MIG Configurations

| Configuration | GPU Instances | Memory per Instance |
|--------------|---------------|---------------------|
| 7 x 1g.10gb | 7 | 10GB each |
| 3 x 2g.20gb + 1g.10gb | 4 | 20GB, 20GB, 20GB, 10GB |
| 2 x 3g.40gb | 2 | 40GB each |
| 1 x 4g.40gb + 1 x 3g.40gb | 2 | 40GB, 40GB |
| 1 x 7g.80gb | 1 | 80GB (full) |

### A100 80GB MIG Configurations

| Configuration | GPU Instances |
|--------------|---------------|
| 7 x 1g.10gb | 7 instances |
| 3 x 2g.20gb + 1g.10gb | 4 instances |
| 2 x 3g.40gb | 2 instances |
| 1 x 7g.80gb | Full GPU |

## Key Takeaways

1. **GPU Instance** - Memory and SM partition
2. **Compute Instance** - Further subdivides SMs within a GPU Instance
3. **Use CUDA_VISIBLE_DEVICES** - Target specific MIG instances via UUID
4. **NVML API** - Query MIG configuration programmatically
5. **Admin required** - MIG configuration needs root access