# How to make use of GPU resources with PyTorch.


In [None]:
!nvidia-smi

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
import torch.optim as optim

We can check to see if we have a GPU like so:

In [2]:
torch.cuda.is_available()  # do we have a GPU? Should return True.

False

In [3]:
torch.cuda.device_count()  # how many GPUs do we have access to?

0

If you are seeing 0 GPU's available, go back and ensure you have started the notebook server with the correct settings. If you are having trouble restarting the server, try File -> Hub Control Panel, like so:

![image.png](docs/assets/hub_controlpanel.png)

## Now we know if we have a GPU device to use - let's use it!

In [4]:
# We'll assign the first GPU Device to our device variable.

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)  # let's see what device we got

cpu


If our print output is not 'cuda:0' - our environment is not set up correctly. Ensure you have loaded a GPU enabled environment.

In [5]:
# let's see the name of our GPU!
torch.cuda.get_device_name(0)

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

### How to load vectors / matrices / data onto the GPU.

In [6]:
X_train = torch.IntTensor([0, 30, 50, 75, 70])  # Initializing a Tensor of Ints
print(X_train.is_cuda, ",", X_train.device)  # Is tensor on GPU? Which device?

False , cpu


In [7]:
# Move it to the device we want.
X_train.to(device)
# Now let's see!
print(X_train.is_cuda, ",", X_train.device)

False , cpu


In [8]:
# Alternatively, we can initialize the tensor directly on Device.
X_test = torch.IntTensor([30, 40, 50], device=device)
print(X_test.is_cuda, ",", X_test.device)

False , cpu


### How to load a Neural Network Model onto the GPU.

In [17]:
# Here is a basic fully connected neural network built in Torch.
# If we want to load it / train it on our GPU, we must first put it on the GPU
# Otherwise it will remain on CPU by default.

batch_size = 1


class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(4, 4)
        self.fc2 = nn.Linear(4, 2)

    def forward(self, x):
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.softmax(x, dim=1)
        return output

In [18]:
model = SimpleNet().to(device)  # Now, the network is loaded onto our GPU!

We've got our model loaded onto the GPU, let's now try training it on some dummy data that we create.

Our network takes in an input of 4 values per datapoint, and then classifies it, so we need to create a dataset with 4 points of data that is either class 0 or 1 per datapoint.

## NOTE - BELOW CODE IS _NOT CONVERGING_ - I AM LIKELY DOING SOMETHING SILLY : ) 
perhaps its impossible to make a fc nn converge with this random data despite them being centered around diff values. OR i am using old training functions that wouldn't work in this situation
## WILL FIX BY EOD OR DROP THIS SECTION AS ITS NOT REALLY NECESSARY FOR THE POINT OF TUTORIAL.

In [19]:
# create labels tensor, 0s and 1s
data_y = torch.cat((torch.zeros(500), torch.ones(500)), 0)
data_y = data_y.type(torch.LongTensor)

In [20]:
# create random datapoints centered around different values
data_x = torch.cat((5*torch.rand(500, 4), torch.rand(500, 4)), 0)

In [21]:
# Just for a sanity check, let's try running one data point through our model!
model(data_x[0])

tensor([[0.5029, 0.4971]], grad_fn=<SoftmaxBackward>)

In [22]:
# create a dataset to use!
train_dataset = TensorDataset(data_x, data_y)

In [23]:
# sticking that in a data loader.
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                           shuffle=False, num_workers=2)

In [24]:
for x in train_loader:
    print(x)

[tensor([[2.9385, 4.7192, 2.3236, 3.8374]]), tensor([0])]
[tensor([[0.8964, 0.4152, 1.4175, 2.5355]]), tensor([0])]
[tensor([[1.3221, 4.2727, 2.5126, 4.7421]]), tensor([0])]
[tensor([[4.6897, 3.4144, 4.8434, 4.8856]]), tensor([0])]
[tensor([[3.0267, 4.6189, 2.4232, 0.6789]]), tensor([0])]
[tensor([[4.7161, 2.6775, 0.1382, 4.2403]]), tensor([0])]
[tensor([[0.3885, 3.6258, 0.2975, 4.6756]]), tensor([0])]
[tensor([[4.8860, 0.4553, 3.8667, 0.4541]]), tensor([0])]
[tensor([[3.8647, 0.0653, 3.8008, 0.4962]]), tensor([0])]
[tensor([[1.3218, 2.2027, 2.5196, 4.7081]]), tensor([0])]
[tensor([[1.1667, 0.8494, 2.2732, 1.6561]]), tensor([0])]
[tensor([[0.5430, 0.9165, 0.0738, 1.4555]]), tensor([0])]
[tensor([[2.1462, 4.1395, 4.6849, 2.4536]]), tensor([0])]
[tensor([[4.7564, 3.3235, 1.2955, 3.8871]]), tensor([0])]
[tensor([[0.6294, 1.3490, 2.6562, 3.1714]]), tensor([0])]
[tensor([[4.0427, 3.0014, 4.5682, 3.6096]]), tensor([0])]
[tensor([[0.7650, 1.8521, 1.4153, 1.6889]]), tensor([0])]
[tensor([[0.58

Okay, our dataset *should* be all set, let's try training our model and see how well it classifies between our two classes! In practice we would also evaluate the model on a test dataset, but for the purposes of this exercise we will only evaluate it on the training set.

In [25]:
from tqdm import tqdm

def train( model, device, train_loader, optimizer, epoch ):
    """Model training function"""
    model.train()
    print(device)
    for batch_idx, (data, target) in tqdm(enumerate(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

In [26]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [29]:
EPOCHS = 5
# our optimization strategy used in training.
optimizer = optim.Adadelta(model.parameters(), lr=0.01)

In [30]:
for epoch in range(1, EPOCHS + 1):
        print( f"EPOCH: {epoch}")
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, train_loader)

EPOCH: 1
cpu


1000it [00:01, 651.55it/s]



Test set: Average loss: -0.4957, Accuracy: 481/1000 (48%)

EPOCH: 2
cpu


1000it [00:01, 607.32it/s]



Test set: Average loss: -0.4966, Accuracy: 546/1000 (55%)

EPOCH: 3
cpu


1000it [00:01, 659.60it/s]



Test set: Average loss: -0.4972, Accuracy: 545/1000 (54%)

EPOCH: 4
cpu


1000it [00:01, 619.23it/s]



Test set: Average loss: -0.4976, Accuracy: 529/1000 (53%)

EPOCH: 5
cpu


1000it [00:01, 641.70it/s]



Test set: Average loss: -0.4980, Accuracy: 517/1000 (52%)



In [82]:
model.eval()

SimpleNet(
  (fc1): Linear(in_features=16, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=2, bias=True)
)