# GPUs

In [1]:
!pip install mxnet-cu100



In [2]:
!nvidia-smi

Sat Sep 21 10:50:02 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.40       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   63C    P8    11W /  70W |      0MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

## Computing Devices


In [3]:
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn

mx.cpu(), mx.gpu(), mx.gpu(0)

(cpu(0), gpu(0), gpu(0))

## NDArray and GPUs


In [4]:
x = nd.array([1, 2, 3])
x.context,x

(cpu(0), 
 [1. 2. 3.]
 <NDArray 3 @cpu(0)>)

### Storage on the GPU


In [5]:
x = nd.ones((2, 3), ctx=mx.gpu())
x


[[1. 1. 1.]
 [1. 1. 1.]]
<NDArray 2x3 @gpu(0)>

Create on the second GPU:

In [6]:
y = nd.random.uniform(shape=(2, 3), ctx=mx.gpu(0))
y


[[0.6686509  0.17409194 0.3850025 ]
 [0.24678314 0.35134333 0.8404298 ]]
<NDArray 2x3 @gpu(0)>

### Copy with `copyto`

Inputs for an operator should be on the same device. 

![Copyto copies arrays to the target device](http://d2l.ai/_images/copyto.svg)

In [7]:
z = x.copyto(mx.gpu(0))
y + z


[[1.6686509 1.1740919 1.3850025]
 [1.2467831 1.3513434 1.8404298]]
<NDArray 2x3 @gpu(0)>

### Copy with `as_in_context`

In [8]:
z = x.as_in_context(mx.gpu(0))
z


[[1. 1. 1.]
 [1. 1. 1.]]
<NDArray 2x3 @gpu(0)>

### Tiny Difference between `copyto` and  `as_in_context` 

In [9]:
# Return the input if the target device is same as the source device
y.as_in_context(mx.gpu(0)) is y

True

In [10]:
# Always create new memory to copy the input
y.copyto(mx.gpu()) is y

False

## Gluon and GPUs

In [11]:
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(ctx=mx.gpu())

# When the input is an NDArray on the GPU, 
# Gluon will calculate the result on the same GPU.
print(net(x))
net[0].weight.data()


[[0.04995865]
 [0.04995865]]
<NDArray 2x1 @gpu(0)>



[[0.0068339  0.01299825 0.0301265 ]]
<NDArray 1x3 @gpu(0)>