In [33]:
%config IPCompleter.greedy=True

In [2]:
import torch

<span style='color:rgb(127, 255, 212)'>To check my laptop graphic card can use cuda cores.</span>

In [3]:
torch.cuda.is_available()

True

In [4]:
#2 by 3 matrices
x = torch.empty(2,3,dtype=torch.float16)
y = torch.zeros(2,3)
print(x)
print(y)
print(f'the size of the tensor "x" is {x.size()}')

tensor([[0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float16)
tensor([[0., 0., 0.],
        [0., 0., 0.]])
the size of the tensor "x" is torch.Size([2, 3])


In [5]:
x = torch.Tensor([2.5,0.1])
print(x)

tensor([2.5000, 0.1000])


## <span style='color:rgb(127, 255, 212)'>  OPEARATIONS </span>

In [6]:
x = torch.rand(2,2)
y = torch.rand(2,2)
print(f'x is \n {x}')
print(f'y is \n {y}')

x is 
 tensor([[0.2577, 0.6458],
        [0.8148, 0.5784]])
y is 
 tensor([[0.8570, 0.1383],
        [0.0672, 0.9275]])


In [7]:
print(x + y)

tensor([[1.1147, 0.7840],
        [0.8819, 1.5060]])


### <span style='color:rgb(127, 255, 212)'> EVERY FUNCTION THAT HAS A TRAILING UNDERSCORE WILL DO IN PLACE OPERATION.</span> 
<span style='color:rgb(127, 255, 212)'>Below example, the original 'y' is replaced by the value of x + y</span>

In [8]:
y.add_(x)
print(y)

tensor([[1.1147, 0.7840],
        [0.8819, 1.5060]])


In [9]:
x = torch.rand(5,3)
print(x)
print(f"all rows from column 1 {x[:,0]}")

tensor([[0.0727, 0.4526, 0.9927],
        [0.3490, 0.8218, 0.5162],
        [0.5746, 0.3220, 0.3540],
        [0.8988, 0.7974, 0.9805],
        [0.4134, 0.8580, 0.3794]])
all rows from column 1 tensor([0.0727, 0.3490, 0.5746, 0.8988, 0.4134])


In [10]:
print(f'2nd row from all column {x[1,:]}')

2nd row from all column tensor([0.3490, 0.8218, 0.5162])


<span style='color:rgb(127, 255, 212)'>item() method can only be used if you want single element</span>

In [11]:
print(f'only element at 2X2 {x[1,1].item()}')

only element at 2X2 0.8217606544494629


In [12]:
x = torch.rand(4,4)
print(x)

tensor([[0.0684, 0.3391, 0.5893, 0.9557],
        [0.7118, 0.3352, 0.8871, 0.6957],
        [0.6183, 0.5611, 0.7521, 0.9504],
        [0.1122, 0.2501, 0.0379, 0.9491]])


In [13]:
y = x.view(16)
print(y)
print(f'x reshape into size of {y.size()}')

tensor([0.0684, 0.3391, 0.5893, 0.9557, 0.7118, 0.3352, 0.8871, 0.6957, 0.6183,
        0.5611, 0.7521, 0.9504, 0.1122, 0.2501, 0.0379, 0.9491])
x reshape into size of torch.Size([16])


<span style='color:rgb(127, 255, 212)'>pytorch will automatically give you the size when reshaping</span>

In [14]:
y = x.view(-1,8) # or (2,8)
print(y)
print(f'size is now {y.size()}')

tensor([[0.0684, 0.3391, 0.5893, 0.9557, 0.7118, 0.3352, 0.8871, 0.6957],
        [0.6183, 0.5611, 0.7521, 0.9504, 0.1122, 0.2501, 0.0379, 0.9491]])
size is now torch.Size([2, 8])


In [15]:
import numpy as np


In [16]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


## Changing tensor into numpy arrays

In [17]:
b = a.numpy()
print(b)
print(f'type of b is {type(b)}') 

[1. 1. 1. 1. 1.]
type of b is <class 'numpy.ndarray'>


# <span style='color:rgb(127, 255, 212)'>if the tensor or numpy array is not on the gpu, the object will share same memory location</span>
**if we change a, the changes will apply to b.** 

In [18]:
a.add_(1)
print(f'a is {a}')
print(f'b is {b}') # modify a will also modify b

a is tensor([2., 2., 2., 2., 2.])
b is [2. 2. 2. 2. 2.]


### <span style='color:rgb(127, 255, 212)'> Below code is to work around the same memory pointer problem</span>

In [19]:
device = torch.device('cuda')
if torch.cuda.is_available():
    # create on gpu
    x = torch.ones(5,device=device)
    
    # or create tensor and then move to gpu
    y = torch.ones(5)
    y = y.to(device)
    z = x + y
print(f'z is {z}')

z is tensor([2., 2., 2., 2., 2.], device='cuda:0')


## <span style='color:rgb(127, 255, 212)'>numpy can only handle cpu tensor</span>
**if you want to change the gpu tensor to numpy array, that tensor needs to converted back to cpu tensor as below**

In [20]:
z_numpy = z.to('cpu').numpy()
print(f'numpy version of z is {z_numpy}')


numpy version of z is [2. 2. 2. 2. 2.]


In [21]:
z.add_(1)
print(f'add 1 to z {z}')
print(f'z_numpy is still the same{z_numpy}')

add 1 to z tensor([3., 3., 3., 3., 3.], device='cuda:0')
z_numpy is still the same[2. 2. 2. 2. 2.]


# <span style='color:rgb(127, 255, 212)'> AutoGrad Package for gradient Computation</span>

### **required_grad = True in tensor creation to make sure that tensor will need to calculate the gradient later in optimization step**

In [22]:
x = torch.rand(4,requires_grad=True)
print(x)

tensor([0.5834, 0.2641, 0.1396, 0.8350], requires_grad=True)


In [23]:
y = x + 2
print(y)

tensor([2.5834, 2.2641, 2.1396, 2.8350], grad_fn=<AddBackward0>)


### <span style='color:rgb(127, 255, 212)'> this is what a graph looks like</span>

![image info](computation_graph.png)

In [24]:
z = y * y * 2
print(z)    # multiplication backward
 

tensor([13.3480, 10.2520,  9.1556, 16.0747], grad_fn=<MulBackward0>)


In [25]:
z = z.mean()
print(z)        # mean backward
z.backward()    # dz/dx
print(f'gradient is {x.grad}')   # x.grad looks like the variable y.

tensor(12.2076, grad_fn=<MeanBackward0>)
gradient is tensor([2.5834, 2.2641, 2.1396, 2.8350])


## <span style='color:teal'>variable 'z' in above cell is scalar which has single value. That's why z.backward() works. If z is not scalar, backward will give you error as below</span>

In [31]:
z = y*y*2
print(f'z has more than one value {z}\n')
try:
    z.backward()
except RuntimeError as e:
    print(f'Error => {e}')

z has more than one value tensor([13.3480, 10.2520,  9.1556, 16.0747], grad_fn=<MulBackward0>)

Error => grad can be implicitly created only for scalar outputs


## <span style = 'color:teal'>If variable is not a scalar, 'backward()' needs to take a vector. The size of column vector must be equal to the size of variable.</span>

In [32]:
vector = torch.tensor([0.1,1.0,0.001,1],dtype = torch.float32)
print(f'the size of vector is {vector.size()}')
print(f'the size of "z" is {z.size()}')
z.backward(vector)     #dz/dx
print(x.grad)

the size of vector is torch.Size([4])
the size of "z" is torch.Size([4])
tensor([ 4.6501, 20.3766,  2.1567, 25.5152])


### <span style='color:teal'>When in the training and we want to update the weight,that updating should not be part of the gradient.</span>

### option 1

In [37]:
x = torch.randn(3,requires_grad = True)
print(x)
x.requires_grad_(False)
print(x)

tensor([-0.4867, -2.4738,  0.1705], requires_grad=True)
tensor([-0.4867, -2.4738,  0.1705])


### option 2

In [38]:
x = torch.randn(3,requires_grad = True)
print(x)
y = x.detach()
print(y)

tensor([-0.3727, -0.7802, -2.1452], requires_grad=True)
tensor([-0.3727, -0.7802, -2.1452])


### option 3

In [39]:
x = torch.randn(3,requires_grad = True)
print(x)
with torch.no_grad():
    y = x + 2
    print(y)

tensor([-0.6279, -0.2623, -0.8403], requires_grad=True)
tensor([1.3721, 1.7377, 1.1597])


## Whenever we call the backward method, the gradient for the tensor will be accumlated into '.grad' attribute. Their values will be summed up.

In [47]:
weights = torch.ones(4,requires_grad = True)

for epoch in range(3):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(f'for {epoch} iteration {weights.grad}')

for 0 iteration tensor([3., 3., 3., 3.])
for 1 iteration tensor([6., 6., 6., 6.])
for 2 iteration tensor([9., 9., 9., 9.])


### To prevent summing up, the gradients must be empited.
####  weights.grad.zero_()

In [48]:
weights = torch.ones(4,requires_grad = True)

for epoch in range(3):
    model_output = (weights*3).sum()
    
    model_output.backward()
    print(f'for {epoch} iteration {weights.grad}')
    
    weights.grad.zero_()

for 0 iteration tensor([3., 3., 3., 3.])
for 1 iteration tensor([3., 3., 3., 3.])
for 2 iteration tensor([3., 3., 3., 3.])


In [None]:
# the above code will be equal to this when we use one of the
# torch optimizer.

optimizer = torch.optim.SGD(weights,lr = 0.01)
optimizer.step()
optimizer.zero_grad()