In [82]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import math
import torch.optim as optim

In [2]:
# uninitilized matrix
x = torch.empty(5, 3)

In [3]:
print(x)

tensor([[0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000]])


In [4]:
# gives you random matrix
x = torch.rand(5, 3)
print(x)

tensor([[0.1704, 0.9741, 0.1023],
        [0.2104, 0.9807, 0.0462],
        [0.7726, 0.8231, 0.7801],
        [0.7125, 0.9836, 0.1381],
        [0.2038, 0.5255, 0.1907]])


In [5]:
# if you want to specify the data type
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])


In [6]:
# if you want to give the data directly
x = torch.tensor([5.5, 2])
print(x)

tensor([5.5000, 2.0000])


In [7]:
# this methods will use all the properties from tensor from before unless we give them the new properties
x = x.new_ones(5, 3, dtype=torch.double)
print(x)
x = torch.rand_like(x, dtype=torch.float)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[0.0309, 0.4967, 0.1347],
        [0.7891, 0.2896, 0.8662],
        [0.3636, 0.6245, 0.4335],
        [0.3678, 0.3438, 0.4075],
        [0.4919, 0.8876, 0.7067]])


In [8]:
# get size : keep in mind that this is actually a tuple and supports all of the tuple functionalities 
print(x.size())

torch.Size([5, 3])


In [9]:
# most things can be done in diffrent ways : here are some of the examples
y = torch.rand(5, 3)
result = torch.empty(*y.size())
print(x + y)
print(torch.add(x,y))
torch.add(x,y, out=result)
print(result)

tensor([[0.3452, 1.1404, 0.7453],
        [1.0496, 0.4620, 1.5062],
        [0.7671, 0.7680, 1.3700],
        [1.0517, 0.4835, 1.2509],
        [0.7248, 1.8035, 0.7756]])
tensor([[0.3452, 1.1404, 0.7453],
        [1.0496, 0.4620, 1.5062],
        [0.7671, 0.7680, 1.3700],
        [1.0517, 0.4835, 1.2509],
        [0.7248, 1.8035, 0.7756]])
tensor([[0.3452, 1.1404, 0.7453],
        [1.0496, 0.4620, 1.5062],
        [0.7671, 0.7680, 1.3700],
        [1.0517, 0.4835, 1.2509],
        [0.7248, 1.8035, 0.7756]])


In [10]:
# we can also use in place methods
print(y + x)
y.add_(x)
print(y)

tensor([[0.3452, 1.1404, 0.7453],
        [1.0496, 0.4620, 1.5062],
        [0.7671, 0.7680, 1.3700],
        [1.0517, 0.4835, 1.2509],
        [0.7248, 1.8035, 0.7756]])
tensor([[0.3452, 1.1404, 0.7453],
        [1.0496, 0.4620, 1.5062],
        [0.7671, 0.7680, 1.3700],
        [1.0517, 0.4835, 1.2509],
        [0.7248, 1.8035, 0.7756]])


In [11]:
# indexing is just like numpy here
# we can reshape everything using .view
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


# Numpy And Pytorch bridge

In [12]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [13]:
b = a.numpy()

In [14]:
print(b)

[1. 1. 1. 1. 1.]


In [15]:
# these things are linked together, watch : 
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [16]:
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out = a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


# Let's talk a little about Cuda

In [17]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))

tensor([[ 3.3195,  0.9166,  1.2204, -0.4471],
        [-0.4810,  1.8132,  1.9455,  2.1372],
        [ 2.8060,  0.2998,  1.9245,  1.1016],
        [-1.2029,  2.3898,  2.0093,  0.7150]], device='cuda:0')
tensor([[ 3.3195,  0.9166,  1.2204, -0.4471],
        [-0.4810,  1.8132,  1.9455,  2.1372],
        [ 2.8060,  0.2998,  1.9245,  1.1016],
        [-1.2029,  2.3898,  2.0093,  0.7150]], dtype=torch.float64)


# Let's compute some gradiants

In [18]:
# .requires_grad shows if some thing needs to be monitored or not
# .backward() will affect the gradients
# .grad will contain the gradients
# if we want to stop computing the gradients and gathering the history for something we just need to call .detach()
# as for the previous part, we can write code in a block in a way that has the same effect using : with torch.no_grad()
# for the last part, even if something has .requires_grad == True, we will compute no gradients for it 
# Function shows how tensors are made up by other tensors and it is shown by .grad_fn 
# if the tensor was made by the user, .grad_fn will be None
# we can use .requires_grad_( ... ) to change the require grad in place

In [19]:
x = torch.ones(2, 2, requires_grad = True)

In [20]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward>)


In [21]:
print(y.grad_fn)

<AddBackward object at 0x000001139504A4E0>


In [22]:
z = y * y * 3
out = z.mean()

print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward>) tensor(27., grad_fn=<MeanBackward1>)


In [24]:
a = torch.rand(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x00000113950581D0>


In [28]:
out.backward()

In [30]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [42]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000  :
    
    y = y * 2
    
print(y)



tensor([-1139.8070,   637.8785,  1129.1609], grad_fn=<MulBackward>)


In [43]:
y.data.norm()

tensor(1726.5728)

In [44]:
y

tensor([-1139.8070,   637.8785,  1129.1609], grad_fn=<MulBackward>)

In [45]:
math.sqrt(sum([i ** 2 for i in y.data]))

1726.5727612817248

In [46]:
gradients = torch.tensor([0.1, 1.0, 1e-4], dtype=torch.float)
y.backward(gradients)
print(x.grad)

tensor([ 102.4000, 1024.0000,    0.1024])


In [47]:
# here is how we turn off our history with out the use of implicitly saying it every time
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    
    print((x ** 2).requires_grad)


True
True
False


In [49]:
# two things to take into consideration 
# first : in order to have requires_grad we have to have the type of torch.float
# and how to use the backward to compute gradiants from one special index



x = torch.tensor([[1, 2, 3, 4]], dtype=torch.float,requires_grad=True)
z = 2*x
loss = z.sum(dim=1)

# do backward for first element of z
z.backward(torch.FloatTensor([[1, 0, 0, 0]]))
print(x.grad.data)
x.grad.data.zero_() #remove gradient in x.grad, or it will be accumulated

# do backward for second element of z
z.backward(torch.FloatTensor([[0, 1, 0, 0]]))
print(x.grad.data)
x.grad.data.zero_()

# do backward for all elements of z, with weight equal to the derivative of
# loss w.r.t z_1, z_2, z_3 and z_4
z.backward(torch.FloatTensor([[1, 1, 1, 1]]))
print(x.grad.data)
x.grad.data.zero_()

# or we can directly backprop using loss
loss.backward() # equivalent to loss.backward(torch.FloatTensor([1.0]))
print(x.grad.data)    

tensor([[2., 0., 0., 0.]])
tensor([[0., 2., 0., 0.]])
tensor([[2., 2., 2., 2.]])
tensor([[2., 2., 2., 2.]])


# let's make some neural netwoks now :D

In [62]:
class Net(nn.Module):
    
    
    def __init__(self):
        
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        
        return x
    
    def num_flat_features(self, x):
        
        size = x.size()[1:]
        
        num_feats = 1
        
        for s in size:
            
            num_feats *= s
        
        return num_feats
    

In [63]:
net = Net()

In [64]:
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [65]:
params = list(net.parameters())
print(len(params))
print(type(params[0]))
print(params[0].size())

10
<class 'torch.nn.parameter.Parameter'>
torch.Size([6, 1, 5, 5])


In [66]:
# the whole model is made only for mini batch gradient descent and you should keep in mind that you can change your dim for mini batch by .unsqueeze(0) 
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0522,  0.0072, -0.1371, -0.0583, -0.0566, -0.0437, -0.0325,  0.0201,
          0.0589, -0.0485]], grad_fn=<ThAddmmBackward>)


In [67]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [68]:
output = net(input)
target = torch.randn(10)
target = target.unsqueeze(0)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(1.4618, grad_fn=<MseLossBackward>)


In [74]:
print(loss.grad_fn)

<MseLossBackward object at 0x00000113951CEC50>


In [77]:
print(loss.grad_fn.next_functions[0][0])

<ThAddmmBackward object at 0x00000113951CE9B0>


In [78]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<ExpandBackward object at 0x00000113951CEF60>


In [79]:
# before we do anything we need to empty the buffer of gradients

print("conv1.bias.grad before back prop and zero")
print(net.conv1.bias.grad)


net.zero_grad()

print("conv1.bias.grad before back prop and after zero")
print(net.conv1.bias.grad)

loss.backward()

print("conv1.bias.grad after back prop and after zero")
print(net.conv1.bias.grad)



conv1.bias.grad before back prop and zero
tensor([-0.0178, -0.0848,  0.0223, -0.1098,  0.1170, -0.0649])
conv1.bias.grad before back prop and after zero
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after back prop and after zero
tensor([ 0.0134,  0.0032,  0.0080,  0.0027, -0.0097, -0.0126])


In [81]:
# just as you can define your own losses you can also optimize your model your self
learning_rate = 1e-2

for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)
    
    

In [85]:
optimizer = optim.SGD(net.parameters(), lr=1e-2)
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()