In [2]:
%config IPCompleter.greedy=True

In [3]:
import torch

<span style='color:cyan'>To check my laptop graphic card can use cuda cores.</span>

In [4]:
torch.cuda.is_available()

True

In [5]:
#2 by 3 matrices
x = torch.empty(2,3,dtype=torch.float16)
y = torch.zeros(2,3)
print(x)
print(y)
print(f'the size of the tensor "x" is {x.size()}')

tensor([[ 1.8477e-06,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00, -3.4766e-01,  4.1779e-02]], dtype=torch.float16)
tensor([[0., 0., 0.],
        [0., 0., 0.]])
the size of the tensor "x" is torch.Size([2, 3])


In [6]:
x = torch.Tensor([2.5,0.1])
print(x)

tensor([2.5000, 0.1000])


## <span style='color:rgb(127, 255, 212)'>  OPEARATIONS </span>

In [7]:
x = torch.rand(2,2)
y = torch.rand(2,2)
print(f'x is \n {x}')
print(f'y is \n {y}')

x is 
 tensor([[0.7444, 0.4086],
        [0.0065, 0.7838]])
y is 
 tensor([[0.7209, 0.5976],
        [0.1657, 0.3635]])


In [8]:
print(x + y)

tensor([[1.4653, 1.0062],
        [0.1722, 1.1472]])


### <span style='color:cyan'> EVERY FUNCTION THAT HAS A TRAILING UNDERSCORE WILL DO IN PLACE OPERATION.</span> 
<span style='color:cyan'>Below example, the original 'y' is replaced by the value of x + y</span>

In [9]:
y.add_(x)
print(y)

tensor([[1.4653, 1.0062],
        [0.1722, 1.1472]])


In [10]:
x = torch.rand(5,3)
print(x)
print(f"all rows from column 1 {x[:,0]}")

tensor([[0.3118, 0.6530, 0.1062],
        [0.6821, 0.4422, 0.5395],
        [0.0651, 0.0719, 0.2088],
        [0.3862, 0.6078, 0.1928],
        [0.5752, 0.1294, 0.0802]])
all rows from column 1 tensor([0.3118, 0.6821, 0.0651, 0.3862, 0.5752])


In [11]:
print(f'2nd row from all column {x[1,:]}')

2nd row from all column tensor([0.6821, 0.4422, 0.5395])


<span style='color:cyan'>item() method can only be used if you want single element</span>

In [12]:
print(f'only element at 2X2 {x[1,1].item()}')

only element at 2X2 0.4422146677970886


In [13]:
x = torch.rand(4,4)
print(x)

tensor([[0.1212, 0.9029, 0.0104, 0.8394],
        [0.5088, 0.7336, 0.4365, 0.7740],
        [0.3133, 0.0312, 0.7634, 0.7447],
        [0.8001, 0.6886, 0.9551, 0.3031]])


In [14]:
y = x.view(16)
print(y)
print(f'x reshape into size of {y.size()}')

tensor([0.1212, 0.9029, 0.0104, 0.8394, 0.5088, 0.7336, 0.4365, 0.7740, 0.3133,
        0.0312, 0.7634, 0.7447, 0.8001, 0.6886, 0.9551, 0.3031])
x reshape into size of torch.Size([16])


<span style='color:cyan'>pytorch will automatically give you the size when reshaping</span>

In [15]:
y = x.view(-1,8) # or (2,8)
print(y)
print(f'size is now {y.size()}')

tensor([[0.1212, 0.9029, 0.0104, 0.8394, 0.5088, 0.7336, 0.4365, 0.7740],
        [0.3133, 0.0312, 0.7634, 0.7447, 0.8001, 0.6886, 0.9551, 0.3031]])
size is now torch.Size([2, 8])


In [16]:
import numpy as np


In [17]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


## <span style= 'color:cyan'>Changing tensor into numpy arrays</span>

In [18]:
b = a.numpy()
print(b)
print(f'type of b is {type(b)}') 

[1. 1. 1. 1. 1.]
type of b is <class 'numpy.ndarray'>


# <span style='color:cyan'>if the tensor or numpy array is not on the gpu, the object will share same memory location</span>
**if we change a, the changes will apply to b.** 

In [19]:
a.add_(1)
print(f'a is {a}')
print(f'b is {b}') # modify a will also modify b

a is tensor([2., 2., 2., 2., 2.])
b is [2. 2. 2. 2. 2.]


### <span style='color:cyan'> Below code is to work around the same memory pointer problem</span>

In [20]:
device = torch.device('cuda')
if torch.cuda.is_available():
    # create on gpu
    x = torch.ones(5,device=device)
    
    # or create tensor and then move to gpu
    y = torch.ones(5)
    y = y.to(device)
    z = x + y
print(f'z is {z}')

z is tensor([2., 2., 2., 2., 2.], device='cuda:0')


## <span style='color:cyan'>numpy can only handle cpu tensor</span>
**if you want to change the gpu tensor to numpy array, that tensor needs to converted back to cpu tensor as below**

In [21]:
z_numpy = z.to('cpu').numpy()
print(f'numpy version of z is {z_numpy}')


numpy version of z is [2. 2. 2. 2. 2.]


In [22]:
z.add_(1)
print(f'add 1 to z {z}')
print(f'z_numpy is still the same{z_numpy}')

add 1 to z tensor([3., 3., 3., 3., 3.], device='cuda:0')
z_numpy is still the same[2. 2. 2. 2. 2.]


# <span style='color:cyan'> AutoGrad Package for gradient Computation</span>

### **required_grad = True in tensor creation to make sure that tensor will need to calculate the gradient later in optimization step**

In [23]:
x = torch.rand(4,requires_grad=True)
print(x)

tensor([0.6541, 0.2894, 0.9124, 0.9993], requires_grad=True)


In [24]:
y = x + 2
print(y)

tensor([2.6541, 2.2894, 2.9124, 2.9993], grad_fn=<AddBackward0>)


### <span style='color:cyan'> this is what a graph looks like</span>

![image info](computation_graph.png)

In [25]:
z = y * y * 2
print(z)    # multiplication backward
 

tensor([14.0883, 10.4829, 16.9636, 17.9919], grad_fn=<MulBackward0>)


In [26]:
z = z.mean()
print(z)        # mean backward
z.backward()    # dz/dx
print(f'gradient is {x.grad}')   # x.grad looks like the variable y.

tensor(14.8817, grad_fn=<MeanBackward0>)
gradient is tensor([2.6541, 2.2894, 2.9124, 2.9993])


## <span style='color:cyan'>variable 'z' in above cell is scalar which has single value. That's why z.backward() works. If z is not scalar, backward will give you error as below</span>

In [27]:
z = y*y*2
print(f'z has more than one value {z}\n')
try:
    z.backward()
except RuntimeError as e:
    print(f'Error => {e}')

z has more than one value tensor([14.0883, 10.4829, 16.9636, 17.9919], grad_fn=<MulBackward0>)

Error => grad can be implicitly created only for scalar outputs


## <span style = 'color:cyan'>If variable is not a scalar, 'backward()' needs to take a vector. The size of column vector must be equal to the size of variable.</span>

In [28]:
vector = torch.tensor([0.1,1.0,0.001,1],dtype = torch.float32)
print(f'the size of vector is {vector.size()}')
print(f'the size of "z" is {z.size()}')
z.backward(vector)     #dz/dx
print(x.grad)

the size of vector is torch.Size([4])
the size of "z" is torch.Size([4])
tensor([ 3.7157, 11.4471,  2.9240, 14.9966])


# <span style='color:cyan'>When in the training and we want to update the weight,that updating should not be part of the gradient.</span>
## So, we need to freeze it.

### option 1

In [29]:
x = torch.randn(3,requires_grad = True)
print(x)
x.requires_grad_(False)
print(x)

tensor([-0.4487, -0.1933, -0.4826], requires_grad=True)
tensor([-0.4487, -0.1933, -0.4826])


### option 2

In [30]:
x = torch.randn(3,requires_grad = True)
print(x)
y = x.detach()
print(y)

tensor([-0.1669, -1.5944,  1.0159], requires_grad=True)
tensor([-0.1669, -1.5944,  1.0159])


### option 3

## <span style='color:cyan'>The wrapper with torch.no_grad() temporarily sets all of the requires_grad flags to false.</span>

In [31]:
x = torch.randn(3, requires_grad=True)
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


## **<span style='color:cyan'>Whenever we call the backward method, the gradient for the tensor will be accumlated into '.grad' attribute. Their values will be summed up.</span>**

In [32]:
weights = torch.ones(4,requires_grad = True)

for epoch in range(3):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(f'for {epoch} iteration {weights.grad}')

for 0 iteration tensor([3., 3., 3., 3.])
for 1 iteration tensor([6., 6., 6., 6.])
for 2 iteration tensor([9., 9., 9., 9.])


### <span style='color:cyan'>To prevent summing up, the gradients must be empited.</span>
####  weights.grad.zero_()

In [33]:
weights = torch.ones(4,requires_grad = True)

for epoch in range(3):
    model_output = (weights*3).sum()
    
    model_output.backward()
    print(f'for {epoch} iteration {weights.grad}')
    
    weights.grad.zero_()

for 0 iteration tensor([3., 3., 3., 3.])
for 1 iteration tensor([3., 3., 3., 3.])
for 2 iteration tensor([3., 3., 3., 3.])


In [34]:
# the above code will be equal to this when we use one of the
# torch optimizer.

optimizer = torch.optim.SGD(weights,lr = 0.01)
optimizer.step()
optimizer.zero_grad()

TypeError: params argument given to the optimizer should be an iterable of Tensors or dicts, but got torch.FloatTensor