# This notebook uses the pytorch Kernel!

### Verify the Correct Conda Environment and Pytorch is GPU ready! (if this doesnt pass then fix before moving forward)

In [2]:
import torch
import subprocess

def is_cuda_installed():
    """
    Check if `nvidia-smi` is installed and available.

    Returns:
        Tuple: (bool, str)
            - True if `nvidia-smi` is available, False otherwise.
            - Output of `nvidia-smi` if available, otherwise an error message.
    """
    try:
        result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if result.returncode == 0:
            return True, result.stdout
        return False, result.stderr
    except FileNotFoundError:
        return False, "nvidia-smi not found. Ensure the NVIDIA drivers are properly installed."

# Check GPU, CUDA, and PyTorch setup
def run_gpu_test():
    """
    Checks for an available GPU, prints useful info to stdout, and runs a simple test.

    Returns:
        bool: True if GPU is available and test passes, False otherwise.
    """
    # Check if CUDA is available
    if not torch.cuda.is_available():
        print("GPU is not available")
        return False

    print("\nGPU is available")
    print("CUDA version:", torch.version.cuda)
    print("PyTorch CUDA version:", torch.version.cuda)
    print("Number of GPUs:", torch.cuda.device_count())
    print("Device name:", torch.cuda.get_device_name(0))

    # Check `nvidia-smi`
    installed, output = is_cuda_installed()
    if not installed:
        print("\n`nvidia-smi` is not installed or not found.\n"
              "Please see https://docs.nvidia.com/cuda/cuda-installation-guide-linux/")
        return False
    print("\n`nvidia-smi` is available:\n")
    print(output)

    # Run a simple PyTorch tensor operation on GPU
    print("Running a simple tensor operation on the GPU...")
    try:
        x = torch.rand(10000, 10000, device='cuda')
        y = torch.mm(x, x)
        print("GPU test passed successfully!")
        return True
    except Exception as e:
        print(f"GPU test failed: {e}")
        return False

# Run the test
run_gpu_test()



GPU is available
CUDA version: 11.8
PyTorch CUDA version: 11.8
Number of GPUs: 1
Device name: NVIDIA GeForce RTX 3070

`nvidia-smi` is available:

Sun Nov 24 13:17:40 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.127.08             Driver Version: 550.127.08     CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3070        Off |   00000000:01:00.0  On |                  N/A |
|  0%   53C    P8             19W /  220W |    1263MiB /   8192MiB |     12%      Default |
|                                         |                        |                

True