# Tests with the eGPU

Table of contents:

- [Basic Checks](#basic-checks)
- [Basic Sequential Neural Network](#basic-sequential-neural-network)
- [MNIST Training Test](#mnist-training-test)

## Basic Checks

In [1]:
import torch

# How many GPUs?
print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())

# Name of each GPU
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

# Memory usage
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Memory allocated:", torch.cuda.memory_allocated(device) / 1024**2, "MB")
print("Max memory allocated:", torch.cuda.max_memory_allocated(device) / 1024**2, "MB")
print("Memory reserved:", torch.cuda.memory_reserved(device) / 1024**2, "MB")

# Clear cache
torch.cuda.empty_cache()

CUDA available: True
GPU count: 2
0 NVIDIA GeForce RTX 3060
1 NVIDIA T500
Memory allocated: 0.0 MB
Max memory allocated: 0.0 MB
Memory reserved: 0.0 MB


In [2]:
# Dosplay CLI tool output nvidia-smi
!nvidia-smi

# For live tracking in Terminal, use built-in loop option
# nvidia-smi -l 1

Mon Oct 20 20:40:30 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.65.06              Driver Version: 580.65.06      CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA T500                    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   47C    P8            N/A  / 5001W |       5MiB /   4096MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA GeForce RTX 3060        Off |   00

## Basic Sequential Neural Network

In [3]:
import time
import torch
import torch.nn as nn
import torch.optim as optim

if torch.cuda.is_available():
    # Pick device (force RTX 3060, index 0)
    device = torch.device("cuda:0")
    print("Using device:", device, torch.cuda.get_device_name(device))
elif torch.backends.mps.is_available():
    # MacOS
    device = torch.device("mps")
    print("Using device:", device)
else:
    # CPU fallback :(
    device = torch.device("cpu")
    print("Using device:", device)

# Dummy dataset: 1,000,000 samples, 100 features, 10 classes
X = torch.randn(1000000, 100, device=device)
y = torch.randint(0, 10, (1000000,), device=device)
# Size
size_bytes = X.numel() * X.element_size() + y.numel() * y.element_size()
print(f"Dataset size (approx): {size_bytes / 1024**2:.2f} MB")

# Simple MLP
model = nn.Sequential(
    nn.Linear(100, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

Using device: cuda:0 NVIDIA GeForce RTX 3060
Dataset size (approx): 389.10 MB


In [4]:
# Training loop, timed
start_time = time.time()
for epoch in range(5):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds")

Epoch 1, Loss: 2.3335
Epoch 2, Loss: 2.3244
Epoch 3, Loss: 2.3190
Epoch 4, Loss: 2.3161
Epoch 5, Loss: 2.3143
Training completed in 0.55 seconds


## MNIST Training Test

In [5]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

if torch.cuda.is_available():
    # Pick device (force RTX 3060, index 0)
    device = torch.device("cuda:0")
    print("Using device:", device, torch.cuda.get_device_name(device))
elif torch.backends.mps.is_available():
    # MacOS
    device = torch.device("mps")
    print("Using device:", device)
else:
    # CPU fallback :(
    device = torch.device("cpu")
    print("Using device:", device)

# Load MNIST
transform = transforms.Compose([transforms.ToTensor()])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
# Size
size_bytes = trainloader.dataset.data.numel() * trainloader.dataset.data.element_size()
print(f"Dataset size (approx, no targets): {size_bytes / 1024**2:.2f} MB")

# Simple CNN
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.fc1 = None   # we’ll initialize later
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.flatten(x, 1)
        if self.fc1 is None:  # lazy init
            self.fc1 = nn.Linear(x.shape[1], 128).to(x.device)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

Using device: cuda:0 NVIDIA GeForce RTX 3060
Dataset size (approx, no targets): 44.86 MB


In [6]:
# Training loop, timed
start_time = time.time()
for epoch in range(10):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader):.4f}")
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds")

Epoch 1, Loss: 0.9733
Epoch 2, Loss: 0.3814
Epoch 3, Loss: 0.3211
Epoch 4, Loss: 0.2917
Epoch 5, Loss: 0.2733
Epoch 6, Loss: 0.2597
Epoch 7, Loss: 0.2495
Epoch 8, Loss: 0.2417
Epoch 9, Loss: 0.2351
Epoch 10, Loss: 0.2298
Training completed in 36.88 seconds


In [5]:
# Delete tensors
del model, optimizer # X, y
# Empty any content in GPU
torch.cuda.empty_cache()