# GPU utilizations
the following code shows you how much GPUs are utilzied. 
I would choose GPU which has the most free Memory, e.g.
11MiB / 11439MiB tells you that only 11MiBs are used

In [1]:
! nvidia-smi | grep -A 2 -B 2 "N/A"

|   0  Tesla K80           Off  | 00000000:04:00.0 Off |                    0 |
| N/A   51C    P0    89W / 149W |    652MiB / 11439MiB |     92%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla K80           Off  | 00000000:05:00.0 Off |                    0 |
| N/A   40C    P0    97W / 149W |    943MiB / 11439MiB |     87%      Default |
+-------------------------------+----------------------+----------------------+
|   2  Tesla K80           Off  | 00000000:83:00.0 Off |                    0 |
| N/A   51C    P0    61W / 149W |  11374MiB / 11439MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   3  Tesla K80           Off  | 00000000:84:00.0 Off |                    0 |
| N/A   49C    P0   150W / 149W |   6381MiB / 11439MiB |     96%      Default |
+-------------------------------+----------------------+----------------------+
                                        

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="1"  # which GPU are we using (from 0 to 3)
import torch
torch.set_num_threads(2)
device="cuda:0"

In [3]:
n=1000 
a = torch.zeros([n,n]).to(device)
 

In [4]:
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
torch.cuda.is_available()

True

In [5]:
train_dataset= datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset= datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

In [6]:
batch_size=100
epochs=10
train_load=torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True,num_workers=2)
test_load=torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False,num_workers=2)

In [9]:
print("Number of images in training set: {}".format(len(train_dataset)))
print("Number of images in test set: {}".format(len(test_dataset)))
print("Number of batches in the train loader: {}".format(len(train_load)))
print("Number of batches in the test loader: {}".format(len(test_load)))

Number of images in training set: 60000
Number of images in test set: 10000
Number of batches in the train loader: 600
Number of batches in the test loader: 100


In [10]:
from classes.models.CNN import CNN
from classes.optim import SGD

torch.manual_seed(47)
model=CNN()
CUDA=torch.cuda.is_available()
if CUDA:
    model=model.cuda()
loss_function=nn.CrossEntropyLoss()
optimizer=SGD.SGD(model.parameters(), lr=1e-3)


for w in model.parameters():
    print w


Parameter containing:
tensor([[[[-0.2980, -0.3001, -0.0216],
          [ 0.2505,  0.0374,  0.1989],
          [ 0.3172, -0.1678, -0.2354]]],


        [[[-0.0437,  0.1325,  0.2589],
          [-0.1574, -0.1561, -0.2417],
          [-0.0260,  0.1626, -0.3100]]],


        [[[-0.2385, -0.0629,  0.0227],
          [ 0.0575, -0.2354, -0.1360],
          [-0.1508,  0.1574,  0.2745]]],


        [[[-0.1811, -0.2940, -0.1910],
          [-0.0098,  0.1716, -0.0090],
          [-0.2765,  0.2010,  0.1643]]],


        [[[-0.0962, -0.0630, -0.3145],
          [-0.0620, -0.1136, -0.1233],
          [ 0.0276, -0.1000, -0.1335]]],


        [[[-0.0280, -0.1061, -0.0684],
          [-0.1077, -0.0805, -0.2235],
          [-0.3329,  0.0497, -0.1480]]],


        [[[-0.0531,  0.1734, -0.1789],
          [-0.1540, -0.0209,  0.2476],
          [-0.0416,  0.2050, -0.2495]]],


        [[[-0.1266,  0.1427,  0.2202],
          [-0.1647,  0.1948, -0.3059],
          [-0.0061, -0.1883,  0.2310]]]], device='cud

In [9]:
iteration=0
for epoch in range(epochs):
  for i, (images,labels) in enumerate(train_load):
    iteration+=1
    if CUDA:
      images =Variable(images.cuda())
      labels =Variable(labels.cuda())
    else:
      images =Variable(images)
      labels =Variable(labels)
      
    optimizer.zero_grad()
    outputs=model(images)
    loss=loss_function(outputs,labels)
    loss.backward()
    optimizer.step()
    
    if(i+1)%100 ==0:
        correct =0
        total =0
        for images,labels in test_load:
            if CUDA:
              images =Variable(images.cuda())
            else:
              images =Variable(images)

            outputs=model(images)
            _,predicted=torch.max(outputs.data,1)
            total+=labels.size(0)
            if CUDA:
              correct += (predicted.cpu()==labels.cpu()).sum()
            else:
              correct += (predicted==labels).sum()

        accuracy = 100 *correct/total
        print("Iteration: {}, Train Loss: {}, Test Accuracy:{}%".format(iteration, loss.item(),accuracy))
        
print("Finished!")

Iteration: 100, Train Loss: 2.21505117416, Test Accuracy:23%
Iteration: 200, Train Loss: 2.1241941452, Test Accuracy:40%
Iteration: 300, Train Loss: 1.97664320469, Test Accuracy:51%
Iteration: 400, Train Loss: 1.88949000835, Test Accuracy:59%
Iteration: 500, Train Loss: 1.75790035725, Test Accuracy:63%
Iteration: 600, Train Loss: 1.60443282127, Test Accuracy:66%
Iteration: 700, Train Loss: 1.44357860088, Test Accuracy:69%
Iteration: 800, Train Loss: 1.34106326103, Test Accuracy:72%
Iteration: 900, Train Loss: 1.26338994503, Test Accuracy:74%
Iteration: 1000, Train Loss: 1.17310547829, Test Accuracy:77%
Iteration: 1100, Train Loss: 1.11943435669, Test Accuracy:78%
Iteration: 1200, Train Loss: 0.966437578201, Test Accuracy:79%
Iteration: 1300, Train Loss: 0.949402868748, Test Accuracy:80%
Iteration: 1400, Train Loss: 1.00470256805, Test Accuracy:82%
Iteration: 1500, Train Loss: 0.802988350391, Test Accuracy:82%
Iteration: 1600, Train Loss: 0.759935617447, Test Accuracy:83%
Iteration: 170

The following will kill the kernel and will free memory on GPU for others to use

In [11]:
exit(0)

In [None]:
iteration=0
trainloss_list=[]
i=0
for epoch in range(epochs):
  #running_loss=0.0
  for i, (images,labels) in enumerate(train_load): 
    iteration+=1
    if CUDA:
      images =Variable(images.cuda())
      labels =Variable(labels.cuda())
    else:
      images =Variable(images)
      labels =Variable(labels)
    
    if i% (len(train_load)/5) == 0:
        train_loss=computeErrorForWholeDataset(train_load)
        #test_loss=computeErrorForWholeDataset(test_load)  # compute errors train and test .....
        train_loss_list=[].append(train_loss)
        print train_loss
    
    optimizer.zero_grad()
    outputs=model(images)
    loss=loss_function(outputs,labels)
    loss.backward()
    optimizer.step()
    
print("Finished!")