In [16]:
import numpy as np
import torch 

In [17]:
x = torch.rand(3,requires_grad=True)
print(x)

tensor([0.8152, 0.0337, 0.0456], requires_grad=True)


In [18]:
y = x +2 
print(y)

tensor([2.8152, 2.0337, 2.0456], grad_fn=<AddBackward0>)


In [22]:
y.backward(torch.ones_like(y))
x.grad

tensor([1., 1., 1.])

In [None]:
z = y*y*2
print(z)

tensor([12.3585, 12.4411, 16.0932], grad_fn=<MulBackward0>)


In [None]:
z = z.mean()
print(z)

tensor(13.6309, grad_fn=<MeanBackward0>)


In [None]:
z.backward()
x.grad

tensor([3.3144, 3.3255, 3.7822])

In [26]:
x = torch.rand(3,requires_grad=True)
print(x)

x.requires_grad_(False)
print(x)

tensor([0.2691, 0.6979, 0.4606], requires_grad=True)
tensor([0.2691, 0.6979, 0.4606])


In [28]:
x = torch.rand(3,requires_grad=True)
print(x)

y = x.detach()
print(y)

tensor([0.2794, 0.3515, 0.1432], requires_grad=True)
tensor([0.2794, 0.3515, 0.1432])


In [30]:
x = torch.randn(3,requires_grad=True)
with torch.no_grad():
    y = x+2 
    print(y)

tensor([3.6148, 1.6176, 1.7307])


In [32]:
weights = torch.ones(4,requires_grad=True)

for eopoch in range(3):
    model_output = (weights*3).sum()
    print(model_output)
    model_output.backward()
    print(weights.grad)

tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(12., grad_fn=<SumBackward0>)
tensor([6., 6., 6., 6.])
tensor(12., grad_fn=<SumBackward0>)
tensor([9., 9., 9., 9.])


In [33]:
weights = torch.ones(4,requires_grad=True)

for eopoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


### backpropergation 

* y = a(x)
* z = b(y)

* chain rule : $\frac{\partial z}{\partial x} = \frac{\partial z}{\partial y} \cdot \frac{\partial y}{\partial x}$

* at each node (here, one computation is done) of the computational graph it finds the local gradient 

In [35]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.5,requires_grad=True)

# forward pass
y_hat = w*x
loss = (y_hat - y)**2 
print(loss)

# backward pass 
loss.backward()
w.grad


tensor(0.2500, grad_fn=<PowBackward0>)


tensor(-1.)

### Gradient descent from backpropergation and autograd

* 1. using numpy 
* 2. using pytorch

In [52]:
# f = w*x with w=2 
X = np.array([1,2,3,4],dtype=np.float32)
Y = np.array([2,4,6,8],dtype=np.float32)

W = 0.0 

# model prediction 
def forward(x):
    return W*x

# loss 
def loss(y,y_pred):
    return ((y-y_pred)**2).mean()

# gradient 
# MSE = 1/N * (w*x-y)**2 
# dJ/dw = 1/N* 2x * (w*x -y)

def gradient(x,y,y_pred):
    return np.dot(2*x, y_pred-y).mean()

print(f'Prediction before training:f(5)={forward(5):.3f}')

# training 
lr = 0.01
n_iters = 10 

for epoch in range(n_iters):
    y_pred = forward(X)
    l = loss(Y,y_pred)
    
    dw = gradient(X,Y,y_pred)
    
    W-=lr*dw
    
    if epoch % 1 ==0:
        print(f'epoch {epoch +1}: w = {w:.3f},loss={l:.8f}')
    
    
print(f'Prediction after training:f(5)={forward(5):.3f}')

Prediction before training:f(5)=0.000
epoch 1: w = 1.500,loss=30.00000000
epoch 2: w = 1.500,loss=4.79999924
epoch 3: w = 1.500,loss=0.76800019
epoch 4: w = 1.500,loss=0.12288000
epoch 5: w = 1.500,loss=0.01966083
epoch 6: w = 1.500,loss=0.00314570
epoch 7: w = 1.500,loss=0.00050332
epoch 8: w = 1.500,loss=0.00008053
epoch 9: w = 1.500,loss=0.00001288
epoch 10: w = 1.500,loss=0.00000206
Prediction after training:f(5)=9.999


In [51]:
# f = w*x with w=2 
X = torch.tensor([1,2,3,4],dtype=torch.float32)
Y = torch.tensor([2,4,6,8],dtype=torch.float32)

W = torch.tensor([0.0],dtype=torch.float32,requires_grad=True) 

# model prediction 
def forward(x):
    return W*x

# loss 
def loss(y,y_pred):
    return ((y-y_pred)**2).mean()

''' ### No gradient calculation is needed with pytorch
# gradient 
# MSE = 1/N * (w*x-y)**2 
# dJ/dw = 1/N* 2x * (w*x -y)

def gradient(x,y,y_pred):
    return np.dot(2*x, y_pred-y).mean()  '''

#print(f'Prediction before training:f(5)={forward(5):.3f}')

# training 
lr = 0.01
n_iters = 10 

for epoch in range(n_iters):
    y_pred = forward(X)
    
    l = loss(Y,y_pred)
    
    #dw = gradient(X,Y,y_pred)
    l.backward()
    # W-=lr*dw
    
    with torch.no_grad():
        W-=lr*w.grad
        
    w.grad.zero_()
        
    
    if epoch % 1 ==0:
        print(f'epoch {epoch +1}: w = {w:.3f},loss={l:.8f}')
    
    
#print(f'Prediction after training:f(5)={forward(5):.3f}')

epoch 1: w = 1.500,loss=30.00000000
epoch 2: w = 1.500,loss=30.00000000
epoch 3: w = 1.500,loss=30.00000000
epoch 4: w = 1.500,loss=30.00000000
epoch 5: w = 1.500,loss=30.00000000
epoch 6: w = 1.500,loss=30.00000000
epoch 7: w = 1.500,loss=30.00000000
epoch 8: w = 1.500,loss=30.00000000
epoch 9: w = 1.500,loss=30.00000000
epoch 10: w = 1.500,loss=30.00000000


1. design our model (input size, output size, forward pass)

2. construct loss and optimizer 

3. Training loop

    - forward pass: compute prediction 
    - backward pass: gradients 
    - update the weights 

    

In [12]:
import torch.nn as nn
import torch

X = torch.tensor([1,2,3,4],dtype=torch.float32)
Y = torch.tensor([2,4,6,8],dtype=torch.float32)

W = torch.tensor(0.0,dtype=torch.float32,requires_grad=True) 

# model prediction 
def forward(x):
    return W*x

# loss 
loss = nn.MSELoss()
optimizer = torch.optim.SGD([W],lr=0.01)


#print(f'Prediction before training:f(5)={forward(5):.3f}')

# training 
n_iters = 10 

for epoch in range(n_iters):
    y_pred = forward(X)
    
    l = loss(Y,y_pred)
    
    #dw = gradient(X,Y,y_pred)
    l.backward()
    # W-=lr*dw
    
    optimizer.step()
        
    optimizer.zero_grad()
        
    
    if epoch % 1 ==0:
        print(f'epoch {epoch +1}: w = {W:.3f},loss={l:.8f}')
    


epoch 1: w = 0.300,loss=30.00000000
epoch 2: w = 0.555,loss=21.67499924
epoch 3: w = 0.772,loss=15.66018772
epoch 4: w = 0.956,loss=11.31448650
epoch 5: w = 1.113,loss=8.17471695
epoch 6: w = 1.246,loss=5.90623236
epoch 7: w = 1.359,loss=4.26725292
epoch 8: w = 1.455,loss=3.08308983
epoch 9: w = 1.537,loss=2.22753215
epoch 10: w = 1.606,loss=1.60939169


In [23]:
import torch.nn as nn
import torch

X = torch.tensor([[1],[2],[3],[4]],dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype=torch.float32)
X_test = torch.tensor([5],dtype=torch.float32)
num_smaples,num_features = X.shape

# model prediction 
model = nn.Linear(num_features,num_features)

# loss 
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.001)


#print(f'Prediction before training:f(5)={forward(5):.3f}')

# training 
n_iters = 1000

for epoch in range(n_iters):
    y_pred = model(X)
    
    l = loss(Y,y_pred)
    
    #dw = gradient(X,Y,y_pred)
    l.backward()
    # W-=lr*dw
    
    optimizer.step()
        
    optimizer.zero_grad()
        
    
    if epoch % 100 ==0:
        [w,b] = model.parameters()
        print(f'epoch {epoch +1}: w = {w[0][0]:.3f},loss={l:.8f}')
    
    
print(f'Prediction after training:f(5)={model(X_test).item():.3f}')

epoch 1: w = -0.378,loss=39.10869217
epoch 101: w = 1.270,loss=1.52484441
epoch 201: w = 1.584,loss=0.21986008
epoch 301: w = 1.651,loss=0.16517991
epoch 401: w = 1.671,loss=0.15413493
epoch 501: w = 1.683,loss=0.14512549
epoch 601: w = 1.692,loss=0.13668759
epoch 701: w = 1.701,loss=0.12874195
epoch 801: w = 1.710,loss=0.12125830
epoch 901: w = 1.719,loss=0.11420970
Prediction after training:f(5)=9.438
