## Convolutional Neural Networks (CNNs)

In [2]:
# Convolutional layers    N images -> C images, filter window size KxK
# Maxpooling operations   Reduce feature map dimensions by picking max value within local, non-overlapping slides size LxL
# Fully connected layers  N output units connected to each of the M input units
# PyTorch : Python package for replacing NumPy with GPUs or deep learning

In [60]:
import numpy as np
import torch

## Tensors

In [61]:
# Values randomly initialized between 0-1
x = torch.Tensor(5, 3)
print(x)
print(x.type)
# The type is Tensor object

tensor([[1.3325e-05, 4.5789e-41, 1.1712e-29],
        [3.0677e-41,        nan, 0.0000e+00],
        [4.4653e+30, 1.3428e+13, 3.9905e+24],
        [4.1996e+12, 9.3157e-39, 1.4976e+10],
        [8.9683e-44, 0.0000e+00, 8.9683e-44]])
<built-in method type of Tensor object at 0x7fa41a7cbb88>


In [62]:
# Random numbers within range 0-1
y = torch.rand(5, 3)
print(y)
print(y.type)
# The type is Tensor object

# Random numbers within range 0-1 with mean 0 std 1
y = torch.randn(5, 3)
print(y)

tensor([[0.1168, 0.8963, 0.0062],
        [0.4636, 0.8260, 0.3008],
        [0.4518, 0.2054, 0.3064],
        [0.8352, 0.1216, 0.6943],
        [0.3327, 0.6699, 0.2132]])
<built-in method type of Tensor object at 0x7fa41a7d2f78>
tensor([[-0.2964, -0.6923,  0.0345],
        [-0.4893, -1.3670,  0.8075],
        [ 0.9762, -3.0063, -0.6665],
        [-0.4943,  2.4127, -1.2222],
        [ 0.5770, -0.6049, -0.2870]])


In [63]:
x = x.double()
y = y.double()
print(x)
print(y)
# The type is torch.float64

tensor([[1.3325e-05, 4.5789e-41, 1.1712e-29],
        [3.0677e-41,        nan, 0.0000e+00],
        [4.4653e+30, 1.3428e+13, 3.9905e+24],
        [4.1996e+12, 9.3157e-39, 1.4976e+10],
        [8.9683e-44, 0.0000e+00, 8.9683e-44]], dtype=torch.float64)
tensor([[-0.2964, -0.6923,  0.0345],
        [-0.4893, -1.3670,  0.8075],
        [ 0.9762, -3.0063, -0.6665],
        [-0.4943,  2.4127, -1.2222],
        [ 0.5770, -0.6049, -0.2870]], dtype=torch.float64)


In [64]:
x = torch.Tensor([[-0.1859,  1.3970,  0.5236],
                  [ 2.3854,  0.0707,  2.1970],
                  [-0.3587,  1.2359,  1.8951],
                  [-0.1189, -0.1376,  0.4647],
                  [-1.8968,  2.0164,  0.1092]])
y = torch.Tensor([[ 0.4838,  0.5822,  0.2755],
                  [ 1.0982,  0.4932, -0.6680],
                  [ 0.7915,  0.6580, -0.5819],
                  [ 0.3825, -1.1822,  1.5217],
                  [ 0.6042, -0.2280,  1.3210]])
print(x.shape)
print(y.shape)

torch.Size([5, 3])
torch.Size([5, 3])


In [65]:
z = torch.stack((x, y))   # stack entire matrices
print(z.shape)
z1 = torch.cat((x, y), 0) # stack alone columns
print(z1.shape)
z2 = torch.cat((x, y), 1) # stack along rows
print(z2.shape)

torch.Size([2, 5, 3])
torch.Size([10, 3])
torch.Size([5, 6])


In [66]:
print(y[4,2])
print(z[1,4,2])

tensor(1.3210)
tensor(1.3210)


In [67]:
print(z[:,4,2]) # There are two elements

tensor([0.1092, 1.3210])


In [68]:
# 3 ways to sum x and y
print(x + y)
print(torch.add(x, y))
print(x.add(y))
torch.add(x, y, out=x)
print(x)

tensor([[ 0.2979,  1.9792,  0.7991],
        [ 3.4836,  0.5639,  1.5290],
        [ 0.4328,  1.8939,  1.3132],
        [ 0.2636, -1.3198,  1.9864],
        [-1.2926,  1.7884,  1.4302]])
tensor([[ 0.2979,  1.9792,  0.7991],
        [ 3.4836,  0.5639,  1.5290],
        [ 0.4328,  1.8939,  1.3132],
        [ 0.2636, -1.3198,  1.9864],
        [-1.2926,  1.7884,  1.4302]])
tensor([[ 0.2979,  1.9792,  0.7991],
        [ 3.4836,  0.5639,  1.5290],
        [ 0.4328,  1.8939,  1.3132],
        [ 0.2636, -1.3198,  1.9864],
        [-1.2926,  1.7884,  1.4302]])
tensor([[ 0.2979,  1.9792,  0.7991],
        [ 3.4836,  0.5639,  1.5290],
        [ 0.4328,  1.8939,  1.3132],
        [ 0.2636, -1.3198,  1.9864],
        [-1.2926,  1.7884,  1.4302]])


In [69]:
# Reshape a tensor
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # -1 is unsure of number of rows
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [71]:
x = torch.randn(10, 10)
y = torch.randn(2, 100)
x = x.view(1, 100)
y = y.view(100, 2)
z = torch.mm(x, y)
print(z)
print(z.shape)

tensor([[13.7006,  1.5672]])
torch.Size([1, 2])


## NumPy and PyTorch

In [79]:
a = torch.ones(5) # type tensor([])
print(a)
print(a.type)
b = a.numpy()     # type []
print(b)
print(b.dtype)

tensor([1., 1., 1., 1., 1.])
<built-in method type of Tensor object at 0x7fa41a7c7ee8>
[1. 1. 1. 1. 1.]
float32


In [80]:
# Tensor and numpy array share same memory location
a[0] += 1
print(a)
print(b)

tensor([2., 1., 1., 1., 1.])
[2. 1. 1. 1. 1.]


In [87]:
# 3 ways to add 1 to first index
a.add_(1)
print(a)
a[:] += 1
print(a)
a = a.add(1)
print(a)

tensor([13., 12., 12., 12., 12.])
tensor([14., 13., 13., 13., 13.])
tensor([15., 14., 14., 14., 14.])


In [88]:
# Convert NumPy array to tensor
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [90]:
# Move tensor onto GPU device's memory .to('cuda') or .cuda()
# Move tensor back to CPU device .to('cpu') or .cpu()
# or allocate tensor directly to GPU using device optional argument
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
x = torch.randn(5, 3).to(device)
y = torch.randn(5, 3, device=device)
z = x + y

cuda


In [92]:
print(z.cpu().numpy())
print(z.numpy)

[[ 1.4859016  -1.1716673   2.5800905 ]
 [ 3.7868176  -0.42896038 -2.0138376 ]
 [ 0.03171322 -1.2702194   1.1888984 ]
 [-1.9605424  -2.220412    0.23264462]
 [-0.73804116 -1.806461   -0.07394073]]
<built-in method numpy of Tensor object at 0x7fa41a7df948>


## Autograd: Automatic Differentiation

In [108]:
# requires_grad is set to False by default setting it True tracks all operations where the tensor is 
# involved when computation complete, call .backward() to have all gradients computed automatically for 
# all these tensors. Gradients for these tensors accumulated into .grad attribute
x = torch.ones(2, 2, requires_grad=True)
print(x)
y = x + 2
print(y) 
print(x.grad)
print(y.grad)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
None
None


In [114]:
z = y * y * 3 
# = 3*(x+2)^2 
# = (3*(x1+2)^2 + 3*(x2+2)^2 + 3*(x3+2)^2 + 3*(x4+2)^2) / 4
# df/dx1 = 3*2(x1+2)/4 = ((6*x1)+12) / 4
# x1 = 1
# df/dx1 = (6+12)/4 = 18/4 = 4.5
f = z.mean()
print(z,f)

b = x1
a = x1*x2*b
f = a+b

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [115]:
f.backward()

In [116]:
print(f.grad_fn)
print(f)

<MeanBackward0 object at 0x7fa35c374b70>
tensor(27., grad_fn=<MeanBackward0>)


## MNIST Data Preparation

In [137]:
import MNISTtools

# Images stacked in columns
# Vector of corresponding labels from 0 to 9
xtrain, ltrain = MNISTtools.load("training", path="./mnist/")
xtest, ltest = MNISTtools.load("testing", path="./mnist/")

In [139]:
def normalize_MNIST_images(x):
    x = ((x-255.0/2)/(255.0/2))
    x = x.astype(np.float32)
    return x

print(np.min(xtrain))
print(np.max(xtrain))
xtrain = normalize_MNIST_images(xtrain)
xtest = normalize_MNIST_images(xtest)

-1.0
1.0


In [146]:
print(xtrain.shape)
print(xtest.shape)
xtrain = xtrain.reshape(28, 28, 1, 60000)
xtest = xtest.reshape(28, 28, 1, 10000)
print(xtrain.shape)
print(xtest.shape)
np.moveaxis(xtrain, 0, -1, -2).shape
print(xtrain.shape)

(28, 28, 1, 60000)
(28, 28, 1, 10000)
(28, 28, 1, 60000)
(28, 28, 1, 10000)


TypeError: moveaxis() takes 3 positional arguments but 4 were given

In [125]:
MNISTtools.show(xtrain[42, 0, :, :])

IndexError: too many indices for array