
# Tensor Basics

In [2]:
import torch                                    
#Tensors are simply mathematical objects that can be used to describe physical properties, just like scalars and vectors. In 
#fact tensors are merely a generalisation of scalars and vectors; a scalar is a zero rank tensor, and a vector is a first rank 
#tensor.
x = torch.empty(1)
print(x)

tensor([0.])


In [3]:
torch.empty(4)     # 1D vector with 4 elements

tensor([ 1.4013e-45,  0.0000e+00, -8.8451e+18,  4.5912e-41])

In [5]:
print(torch.empty(2,3))         # 2D 
print(torch.empty(2,2,4))       # 3D
print(torch.empty(2,2,2,2))     # 4D


tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.]]])
tensor([[[[1.0102e-38, 1.0286e-38],
          [1.0194e-38, 9.6429e-39]],

         [[9.2755e-39, 9.1837e-39],
          [9.3674e-39, 1.0745e-38]]],


        [[[1.0653e-38, 9.5510e-39],
          [1.0561e-38, 1.0194e-38]],

         [[1.1112e-38, 1.0561e-38],
          [9.9184e-39, 1.0653e-38]]]])


In [6]:
torch.rand(2,3)

tensor([[0.4836, 0.4567, 0.4983],
        [0.8350, 0.3400, 0.7881]])

In [7]:
torch.zeros(4,4)                     # can use like numpy

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [8]:
torch.ones(3,3)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [14]:
x = torch.ones(2,2, dtype = torch.float16)
print(x)

tensor([[1., 1.],
        [1., 1.]], dtype=torch.float16)


In [16]:
x = torch.ones(2,2, dtype = torch.double)
print(x)

tensor([[1., 1.],
        [1., 1.]], dtype=torch.float64)


In [1]:
import torch
x = torch.ones(2,2, dtype = torch.int)
x

tensor([[1, 1],
        [1, 1]], dtype=torch.int32)

In [17]:
print(x.size())

torch.Size([2, 2])


In [19]:
#we can also create a tensor
torch.tensor([3.6, 5, 6])

tensor([3.6000, 5.0000, 6.0000])

In [2]:
import torch
x = torch.rand(2,2)
y = torch.rand(2,2)
print(x)
print(y)

tensor([[0.9504, 0.6960],
        [0.9970, 0.3403]])
tensor([[0.4174, 0.1551],
        [0.2147, 0.8817]])


In [3]:
z = x + y 
z

tensor([[1.3678, 0.8511],
        [1.2117, 1.2220]])

In [5]:
torch.add(x,y)

tensor([[1.3678, 0.8511],
        [1.2117, 1.2220]])

In [6]:
x = torch.rand(2,2)
y = torch.rand(2,2)
y.add_(x)                            #every function having an underscore will perform an inplace operation in pytorch

tensor([[0.9663, 0.6606],
        [1.0507, 0.7550]])

In [9]:
#similarly
torch.sub(x,y)
y.sub_(x)

tensor([[0.2955, 0.2196],
        [0.2037, 0.1249]])

In [13]:
print(torch.mul(x,y))
print(y.mul_(x))

tensor([[0.0892, 0.0188],
        [0.1238, 0.0312]])
tensor([[0.0892, 0.0188],
        [0.1238, 0.0312]])


In [14]:
torch.div(x,y)

tensor([[ 7.5210, 23.4152],
        [ 6.8424, 20.1642]])

In [18]:
x = torch.rand(5,4)                  #slicing
print(x)
print(x[:,0])
print(x[1,:])
print(x[1,1])

tensor([[0.4045, 0.9259, 0.0204, 0.1073],
        [0.2326, 0.4080, 0.6381, 0.0708],
        [0.8217, 0.5334, 0.9933, 0.0996],
        [0.3481, 0.3111, 0.7953, 0.3522],
        [0.1089, 0.1623, 0.3133, 0.7899]])
tensor([0.4045, 0.2326, 0.8217, 0.3481, 0.1089])
tensor([0.2326, 0.4080, 0.6381, 0.0708])
tensor(0.4080)


In [20]:
#reshaping
x = torch.rand(4,4)
print(x)
print(x.view(16))        # 1D

tensor([[0.9153, 0.4827, 0.2136, 0.8976],
        [0.6122, 0.7277, 0.4785, 0.1733],
        [0.8557, 0.8053, 0.1484, 0.5252],
        [0.7970, 0.8413, 0.4391, 0.9449]])
tensor([0.9153, 0.4827, 0.2136, 0.8976, 0.6122, 0.7277, 0.4785, 0.1733, 0.8557,
        0.8053, 0.1484, 0.5252, 0.7970, 0.8413, 0.4391, 0.9449])


In [35]:
#we can resize the tensors
x = torch.rand(4,4)
print(x)
print(x.view(-1,8))
x.view(-1,8).size()

tensor([[0.4443, 0.6559, 0.4730, 0.7609],
        [0.9273, 0.6397, 0.3965, 0.9528],
        [0.9079, 0.2264, 0.5950, 0.7351],
        [0.4473, 0.9228, 0.6103, 0.2132]])
tensor([[0.4443, 0.6559, 0.4730, 0.7609, 0.9273, 0.6397, 0.3965, 0.9528],
        [0.9079, 0.2264, 0.5950, 0.7351, 0.4473, 0.9228, 0.6103, 0.2132]])


torch.Size([2, 8])

In [39]:
# onverting from numpy to tensor or vice versa
import torch
import numpy as np
a = torch.ones(5)
print(a)
b = a.numpy()
print(b)
print(type(b))   
# note down that if the tensors are on the cpu and not the gpu then both the tensors will share the same memory location

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [40]:
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [42]:
a = np.ones(4)
print(a)
b = torch.from_numpy(a)
print(b)
a += 1
print(a)
print(b)

[1. 1. 1. 1.]
tensor([1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2.]
tensor([2., 2., 2., 2.], dtype=torch.float64)


In [2]:
import numpy as np
import torch

if torch.cuda.is_available():
    device = torch.device('cuda')
    x = torch.ones(5,device = device)
    y = torch.ones(5)
    y = y.to(device)
    z = x + y
    z = z.to('cpu')
    print(z)

tensor([2., 2., 2., 2., 2.])


In [4]:
x = torch.ones(5, requires_grad = True)   #so this will tell pytorch to calculate the gradients later for this tensor for 
                                        #optimization steps, by default it is false
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


## Autograd

In [6]:
#Autograd package is used to calculate the gradients which is essential for model optimization
import torch

x = torch.randn(3, requires_grad = True)
print(x)

tensor([-0.7484,  0.0254, -1.1034], requires_grad=True)


In [7]:
#to calculate gradients there will be forward pass to calculate y here and since requires_grad is specified as True pytorch
#automatically provides a function to calculate the gradients using the back propagation method. It contains an attribute 
#namely grad_fn and performs the operation to calculate the gradients of y wrt x (dy/dx), since it as an addition operation 
#it is shown as AddBackward in the output
y = x+2
print(y)

tensor([1.2516, 2.0254, 0.8966], grad_fn=<AddBackward0>)


In [8]:
z = y*y*2
z

tensor([3.1329, 8.2042, 1.6078], grad_fn=<MulBackward0>)

In [10]:
z = y.mean()
z

tensor(1.3912, grad_fn=<MeanBackward0>)

In [14]:
#the only thing that must be done to calculate gradient is 
z.backward()  #dz/dx
print(x.grad)

tensor([1.3333, 1.3333, 1.3333])


In [19]:
import torch                  #if don't specify the argument

x = torch.randn(3, requires_grad = False)
print(x)
y = x+2
print(y)
z = y*y*2
z = z.mean()
print(z)
z.backward()
print(x.grad)

tensor([ 0.5682,  1.6061, -0.5940])
tensor([2.5682, 3.6061, 1.4060])
tensor(14.3841)


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [21]:
import torch                #vector jacobian product

x = torch.randn(3, requires_grad = True)
print(x)
y = x+2
print(y)
z = y*y*2
#z = z.mean()
v = torch.tensor([0.1,1.0,2.0], dtype = torch.float32)
print(z)
z.backward(v)
print(x.grad)


tensor([-1.3998, -2.0988,  0.5052], requires_grad=True)
tensor([ 0.6002, -0.0988,  2.5052], grad_fn=<AddBackward0>)
tensor([ 0.7204,  0.0195, 12.5521], grad_fn=<MulBackward0>)
tensor([ 0.2401, -0.3952, 20.0417])


In [27]:
import torch             

x = torch.randn(3, requires_grad = True)
print(x)
#we should also know to prevent tracking gradients if we don't need it 
# x.requires_grad_(false)
# x.detach()                 #it will create a new tensor that doesn't require a gradient
# with torch.no_grad():
x.requires_grad_(False)
print(x)

y = x.detach()
print(y)

with torch.no_grad():
    y = x + 2
    print(y)
    


tensor([0.1927, 0.4535, 0.3259], requires_grad=True)
tensor([0.1927, 0.4535, 0.3259])
tensor([0.1927, 0.4535, 0.3259])
tensor([2.1927, 2.4535, 2.3259])
tensor([2.1927, 2.4535, 2.3259])


In [6]:
import torch
weights = torch.ones(4, requires_grad = True)
for epoch in range(3):
    model_output = (weights*4).sum()
    model_output.backward()
    print(weights.grad)
    
    weights.grad.zero_()                        #to get back the gradients to initial values


tensor([4., 4., 4., 4.])
tensor([4., 4., 4., 4.])
tensor([4., 4., 4., 4.])


In [None]:
#we have a pytorvh buily in optimizer
import torch
weights = torch.ones(3, requires_grad = True)

optimizer = torch.optim.SGD(weights, lr=0.01)      #SGD = Stochastic Gradient Descent, lr = learning rate
optimizer.step()
optimizer.zero_grad()

In [1]:
import torch
weights = torch.ones(4, requires_grad = True)

z.backward()
weights.grad.zero_()


NameError: name 'z' is not defined

## Backpropagation

In [5]:
#we must know the chain rule for backpropagation and we calculate the local gradients and lastly calculate the loss function
# which we want to minimize
#This whole process consists of 3 steps in short
# 1) forward pass: compute loss
# 2) compute local gradients
# 3) backward pass: compute dLoss/dWeights using the chain rule  (we compute the gradient of the loss) 
#example x=1, y=2, w=1
import torch
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad = True)

#forwad pass and compute the loss
y_cap = w*x
loss = (y_cap - y)**2

print(loss)

#backwardpass  (pytorch automatically computes the local gradients for us and also the backward pass)
loss.backward()
print(w.grad)

### update weights
### next forward and backwardpass

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


## Gradient Descent using Autograd

In [5]:
# we can optimize our model by calculating automatic gradient computation using the autograd package    
# 1) Prediction : pytorch model
# 2) Gradients computation : autograd
# 3) Loss Computation : pytorch loss
# 4) Parameter updates : pytorch optimizer

#step 1
import numpy as np

#f = w * x   
#f = 2 * x
X = np.array([1,2,3,4], dtype = np.float32)
Y = np.array([2,4,6,8],dtype = np.float32)

w = 0.0    #initialising weights

#Model prediction
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_predicted):
    return ((y_predicted - y)**2).mean()
    
#gradient
#MSE = 1/N * (w*x-y)**2
# dJ/dw = 1/N 2x (w*x-y)
def gradient(x,y,y_predicted):
    return np.dot(2*x, y_predicted-y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    #loss
    l = loss(Y,y_pred)
    
    #gradients
    dw = gradient(X,Y,y_pred)
    
    #update weights
    w -= learning_rate*dw
    
    if epoch % 2 == 0:
        print(f'epoch{epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')        


#here we calculate every step manually

Prediction before training: f(5) = 0.000
epoch1: w = 1.200, loss = 30.00000000
epoch3: w = 1.872, loss = 0.76800019
epoch5: w = 1.980, loss = 0.01966083
epoch7: w = 1.997, loss = 0.00050332
epoch9: w = 1.999, loss = 0.00001288
epoch11: w = 2.000, loss = 0.00000033
epoch13: w = 2.000, loss = 0.00000001
epoch15: w = 2.000, loss = 0.00000000
epoch17: w = 2.000, loss = 0.00000000
epoch19: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


In [6]:
# we can optimize our model by calculating automatic gradient computation using the autograd package
# 1) Prediction : pytorch model
# 2) Gradients computation : autograd
# 3) Loss Computation : pytorch loss
# 4) Parameter updates : pytorch optimizer

#step 2
import torch

#f = w * x   
#f = 2 * x
X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)    #initialising weights

#Model prediction
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_predicted):
    return ((y_predicted - y)**2).mean()
    

print(f'Prediction before training: f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    #loss
    l = loss(Y,y_pred)
    
    #gradients = backward pass
    l.backward()  #dl/dw
    
    #update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    #zero gradients
    w.grad.zero_()
    
    
    if epoch % 2 == 0:
        print(f'epoch{epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')        



Prediction before training: f(5) = 0.000
epoch1: w = 0.300, loss = 30.00000000
epoch3: w = 0.772, loss = 15.66018772
epoch5: w = 1.113, loss = 8.17471695
epoch7: w = 1.359, loss = 4.26725292
epoch9: w = 1.537, loss = 2.22753215
epoch11: w = 1.665, loss = 1.16278565
epoch13: w = 1.758, loss = 0.60698116
epoch15: w = 1.825, loss = 0.31684780
epoch17: w = 1.874, loss = 0.16539653
epoch19: w = 1.909, loss = 0.08633806
Prediction after training: f(5) = 9.612


In [7]:
# we can optimize our model by calculating automatic gradient computation using the autograd package
# 1) Prediction : pytorch model
# 2) Gradients computation : autograd
# 3) Loss Computation : pytorch loss
# 4) Parameter updates : pytorch optimizer

#step 2
import torch

#f = w * x   
#f = 2 * x
X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)    #initialising weights

#Model prediction
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_predicted):
    return ((y_predicted - y)**2).mean()
    

print(f'Prediction before training: f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    #loss
    l = loss(Y,y_pred)
    
    #gradients = backward pass
    l.backward()  #dl/dw
    
    #update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    #zero gradients
    w.grad.zero_()
    
    
    if epoch % 10 == 0:
        print(f'epoch{epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')   

Prediction before training: f(5) = 0.000
epoch1: w = 0.300, loss = 30.00000000
epoch11: w = 1.665, loss = 1.16278565
epoch21: w = 1.934, loss = 0.04506890
epoch31: w = 1.987, loss = 0.00174685
epoch41: w = 1.997, loss = 0.00006770
epoch51: w = 1.999, loss = 0.00000262
epoch61: w = 2.000, loss = 0.00000010
epoch71: w = 2.000, loss = 0.00000000
epoch81: w = 2.000, loss = 0.00000000
epoch91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


## Training pipeline
Model / Loss / optimizer

In [7]:
#The general training pipeline in pytorch
# 1) Design Model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
# - forward pass: compute prediction
# - backward pass: gradients
# - update weights

#step 3
import torch
import torch.nn as nn


#f = w * x   
#f = 2 * x
X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)    #initialising weights

#Model prediction
def forward(x):
    return w*x

    

print(f'Prediction before training: f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    #loss
    l = loss(Y,y_pred)
    
    #gradients = backward pass
    l.backward()  #dl/dw
    
    #update weights
    optimizer.step()
    
    #zero gradients
    optimizer.zero_grad()
    
    
    if epoch % 10 == 0:
        print(f'epoch{epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')   

Prediction before training: f(5) = 0.000
epoch1: w = 0.300, loss = 30.00000000
epoch11: w = 1.665, loss = 1.16278565
epoch21: w = 1.934, loss = 0.04506890
epoch31: w = 1.987, loss = 0.00174685
epoch41: w = 1.997, loss = 0.00006770
epoch51: w = 1.999, loss = 0.00000262
epoch61: w = 2.000, loss = 0.00000010
epoch71: w = 2.000, loss = 0.00000000
epoch81: w = 2.000, loss = 0.00000000
epoch91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


In [16]:
#step 4
import torch
import torch.nn as nn


#f = w * x   
#f = 2 * x
X = torch.tensor([[1],[2],[3],[4]], dtype = torch.float32)          #we need to keep the inputs in 2D when we are creating 
                                                                    #the model using torch and here the rows represent number
                                                                    #of samples    
Y = torch.tensor([[2],[4],[6],[8]], dtype = torch.float32) 
X_test = torch.tensor([5], dtype = torch.float32)

n_samples,n_features = X.shape
print(n_samples,n_features)

input_size = n_features
output_size = n_features

model = nn.Linear(input_size, output_size)

    

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

#training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)
    
    #loss
    l = loss(Y,y_pred)
    
    #gradients = backward pass
    l.backward()  #dl/dw
    
    #update weights
    optimizer.step()
    
    #zero gradients
    optimizer.zero_grad()
    
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch{epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')   

4 1
Prediction before training: f(5) = 2.167
epoch1: w = 0.798, loss = 20.34970093
epoch11: w = 1.830, loss = 0.52759916
epoch21: w = 1.995, loss = 0.01468987
epoch31: w = 2.021, loss = 0.00135930
epoch41: w = 2.025, loss = 0.00095737
epoch51: w = 2.025, loss = 0.00089330
epoch61: w = 2.024, loss = 0.00084110
epoch71: w = 2.023, loss = 0.00079213
epoch81: w = 2.023, loss = 0.00074603
epoch91: w = 2.022, loss = 0.00070260
Prediction after training: f(5) = 10.044


In [17]:
#step 4
import torch
import torch.nn as nn


#f = w * x   
#f = 2 * x
X = torch.tensor([[1],[2],[3],[4]], dtype = torch.float32)          #we need to keep the inputs in 2D when we are creating 
                                                                    #the model using torch and here the rows represent number
                                                                    #of samples    
Y = torch.tensor([[2],[4],[6],[8]], dtype = torch.float32) 
X_test = torch.tensor([5], dtype = torch.float32)

n_samples,n_features = X.shape
print(n_samples,n_features)

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size, output_size)

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        #define layers
        self.lin = nn.Linear(input_dim, output_dim)

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

#training
learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)
    
    #loss
    l = loss(Y,y_pred)
    
    #gradients = backward pass
    l.backward()  #dl/dw
    
    #update weights
    optimizer.step()
    
    #zero gradients
    optimizer.zero_grad()
    
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch{epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}') 

4 1
Prediction before training: f(5) = 10.044
epoch1: w = 2.021, loss = 0.00066171
epoch11: w = 2.021, loss = 0.00062320
epoch21: w = 2.020, loss = 0.00058692
epoch31: w = 2.020, loss = 0.00055276
epoch41: w = 2.019, loss = 0.00052058
epoch51: w = 2.018, loss = 0.00049028
epoch61: w = 2.018, loss = 0.00046174
epoch71: w = 2.017, loss = 0.00043487
epoch81: w = 2.017, loss = 0.00040955
epoch91: w = 2.016, loss = 0.00038572
Prediction after training: f(5) = 10.033
