GPU Availability Test Notebook

By Nethali Zoysa (2024/08/02)

First, let's install the necessary libraries

In [None]:
!pip install torch numpy matplotlib py3nvml

Import required libraries and check if CUDA (GPU support) is available

In [None]:
import torch

print("CUDA available:", torch.cuda.is_available())

If CUDA is available, print some information about the GPU

In [None]:
import time

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())
    
    # Create a large tensor on GPU to test performance
    device = torch.device("cuda")
    x = torch.randn(10000, 10000, device=device)
    y = torch.randn(10000, 10000, device=device)
    
    # Perform matrix multiplication and measure time
    start_time = torch.cuda.Event(enable_timing=True)
    end_time = torch.cuda.Event(enable_timing=True)
    
    start_time.record()
    z = torch.matmul(x, y)
    end_time.record()
    
    # Wait for GPU computation to finish
    torch.cuda.synchronize()
    
    print(f"GPU Matrix multiplication time: {start_time.elapsed_time(end_time):.2f} ms")
else:
    print("CUDA is not available. Using CPU instead.")
    
    # Create a large tensor on CPU to test performance
    x = torch.randn(10000, 10000)
    y = torch.randn(10000, 10000)
    
    # Perform matrix multiplication and measure time
    start_time = time.time()
    z = torch.matmul(x, y)
    end_time = time.time()
    
    print(f"CPU Matrix multiplication time: {(end_time - start_time) * 1000:.2f} ms")

Get GPU details 

In [None]:
import py3nvml

if torch.cuda.is_available():
    py3nvml.py3nvml.nvmlInit()

    # Get the number of GPUs
    num_gpus = py3nvml.py3nvml.nvmlDeviceGetCount()

    # Loop through all GPUs
    for i in range(num_gpus):
        handle = py3nvml.py3nvml.nvmlDeviceGetHandleByIndex(i)
        print(f"GPU {i}: {py3nvml.py3nvml.nvmlDeviceGetName(handle)}")
        print(f"Memory: {py3nvml.py3nvml.nvmlDeviceGetMemoryInfo(handle).used / 1024**2:.2f} MB")
        print(f"Temperature: {py3nvml.py3nvml.nvmlDeviceGetTemperature(handle, py3nvml.py3nvml.NVML_TEMPERATURE_GPU)} C")
        print(f"Power: {py3nvml.py3nvml.nvmlDeviceGetPowerUsage(handle) / 1000:.2f} W")
        print()
else:
    print("Skipped since GUP is not available")

Get more GPU details

In [None]:
import subprocess

if torch.cuda.is_available():
    # Use nvidia-smi command to get details
    details = subprocess.check_output(['nvidia-smi'])
    print(details.decode('utf-8'))
else:
    print("Skipped since GUP is not available")

Test GPU performances

In [None]:
import numpy as np
import matplotlib.pyplot as plt

if torch.cuda.is_available():
    # Test NumPy performance (CPU-based)
    a = np.random.randn(5000, 5000)  # Reduced size for faster execution
    b = np.random.randn(5000, 5000)  # Reduced size for faster execution

    start_time = time.time()
    c = np.dot(a, b)
    end_time = time.time()

    print(f"NumPy matrix multiplication time: {(end_time - start_time) * 1000:.2f} ms")

    # Create a simple plot to test matplotlib
    plt.figure(figsize=(10, 6))
    plt.plot(np.random.randn(1000).cumsum())
    plt.title("Random Walk")
    plt.xlabel("Step")
    plt.ylabel("Value")
    plt.show()
else:
    print("Skipped since GUP is not available")

print("Notebook execution completed successfully!")