In [None]:
#!pip install numpy torch

In [None]:
import numpy as np
import torch

inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [None]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [None]:
# we used 2x3 for weights because we predicting 2 variables(apples,oranges) and each formala is a set of 3 elements(w11,w12,w13).We represent weight with matrix
#We represent weight with biase as a vector thats why its only number 2 
w = torch.randn(2, 3, requires_grad=True) # w or m derivative , -(2/n)*sum(x*(y-y_predicted))
b = torch.randn(2, requires_grad=True) #b derivative , -(2/n)*sum(y-y_predicted)
print(w)
print(b)

tensor([[-0.8323,  0.1733, -0.9786],
        [ 0.4811,  0.0117, -0.1622]], requires_grad=True)
tensor([3.0281, 0.9385], requires_grad=True)


Our model is simply a function that performs a matrix multiplication of the inputs and the weights w (transposed) and adds the bias b (replicated for each observation).





In [None]:
inputs @ w.t() + b

tensor([[ -88.1964,   29.8719],
        [-120.0883,   35.3724],
        [-102.9160,   34.9591],
        [-110.6206,   44.5173],
        [-106.2630,   23.9079]], grad_fn=<AddBackward0>)

In [None]:
#defining model(linear regression)

def model(x):
  return x @ w.t() +  b

preds = model(inputs)
print(preds)

tensor([[ -88.1964,   29.8719],
        [-120.0883,   35.3724],
        [-102.9160,   34.9591],
        [-110.6206,   44.5173],
        [-106.2630,   23.9079]], grad_fn=<AddBackward0>)


In [None]:
def mse(preds, targets):
    diff = preds - targets
    return torch.sum(diff * diff) / diff.numel()

In [None]:
loss = mse(preds, targets)
print(loss)

tensor(19648.3340, grad_fn=<DivBackward0>)


In [None]:
#compute gradient|  
loss.backward()

In [None]:
#The gradients are stored in the .grad property of the respective tensors. We call it by .grad method
print(w)
print(w.grad) #the value represent the derivative/gradient of the loss w.r.t(with respect to) the weight above👆


tensor([[-0.8323,  0.1733, -0.9786],
        [ 0.4811,  0.0117, -0.1622]], requires_grad=True)
tensor([[-15219.7041, -16577.1230, -10299.3203],
        [ -4645.1226,  -6081.3770,  -3598.0706]])


In [None]:
#Gradient descent -> we are descending along the gradient by substracting a small amount from the gradient
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5


In [None]:
print(w)
print(b)

tensor([[-0.6801,  0.3391, -0.8756],
        [ 0.5276,  0.0725, -0.1262]], requires_grad=True)
tensor([3.0299, 0.9391], requires_grad=True)


In [None]:
# Calculate loss
loss = mse(preds, targets)
print(loss)

tensor(19648.3340, grad_fn=<DivBackward0>)


In [None]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


Training the model using gradient descent optimization

In [None]:
#step1: Generate predictions

preds = model(inputs)
print(preds)

tensor([[-61.5489,  38.8852],
        [-85.0571,  47.2544],
        [-61.4861,  49.2369],
        [-84.1558,  53.2021],
        [-72.6360,  35.4704]], grad_fn=<AddBackward0>)


In [None]:
#step2: Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(13419.7686, grad_fn=<DivBackward0>)


In [None]:
#step3: Compute Gradient
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-12468.2646, -13619.9385,  -8474.5469],
        [ -3712.1094,  -5072.1426,  -2976.7019]])
tensor([-149.1768,  -47.1902])


In [None]:
#step4: Adjust weight and reset gradient
with torch.no_grad():
  w -= w.grad * 1e-5
  b -= b.grad * 1e-5
  w.grad.zero_()
  b.grad.zero_()

In [None]:
print(w,b , sep="\n ")

tensor([[-0.5554,  0.4753, -0.7908],
        [ 0.5647,  0.1232, -0.0964]], requires_grad=True)
 tensor([3.0314, 0.9396], requires_grad=True)


In [None]:
#step5: Calculate the loss to see its reduction
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(9220.5107, grad_fn=<DivBackward0>)


If the learning rate is too low the network might take several iterations and epochs to converge, on the opposite side of things, if the learning rate is too high, there is a risk of overshooting the minimum, and as a result of this our training doesn’t converge.
To reduce the loss further, we can repeat the process of adjusting the weights and biases using the gradients multiple times. Each iteration is called an epoch. Let's train the model for 100 epochs.
#To reduce the loss further, we can repeat the process of adjusting the weights and biases using the gradients multiple times. Each iteration is called an epoch. Let's train the model for 100 epochs.


In [None]:
for epoch in range(1500):
  preds = model(inputs)
  loss = mse(preds,targets)
  loss.backward()
  with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()
    

In [None]:
#calculate new loss
preds = model(inputs)
loss = mse(preds,targets)
print(loss)

tensor(6.5299, grad_fn=<DivBackward0>)


In [None]:
#lets compare prediction from target
preds

tensor([[ 57.9070,  70.6387],
        [ 79.9339,  99.1284],
        [122.6797, 135.9380],
        [ 22.1583,  37.8418],
        [ 97.9371, 116.1582]], grad_fn=<AddBackward0>)

In [None]:
#
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

Linear regression using PyTorch built-ins

In [None]:
import torch.nn as nn #utility class for building neural network
import numpy as np

inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')


In [None]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs,targets, sep='\n')

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.],
        [ 88., 134.,  59.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 73.,  66.,  44.],
        [ 92.,  87.,  64.],
        [ 87., 135.,  57.],
        [103.,  43.,  36.],
        [ 68.,  97.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])


In [None]:
#for large dataset we train the model in batches other than that the dataset wont fit in memory or it will slow you down
from torch.utils.data import TensorDataset, DataLoader
#define data set
train_ds = TensorDataset(inputs,targets)
train_ds[:3]


(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [None]:
batch_size = 5
train_dl = DataLoader(train_ds,batch_size , shuffle=True)#Shuffling helps randomize the input to the optimization algorithm, leading to a faster reduction in the loss.
for xb, yb in train_dl:
  print(xb,yb,sep='\n')
  break

tensor([[101.,  44.,  37.],
        [ 92.,  87.,  64.],
        [ 88., 134.,  59.],
        [ 91.,  87.,  65.],
        [ 69.,  96.,  70.]])
tensor([[ 21.,  38.],
        [ 82., 100.],
        [118., 132.],
        [ 80., 102.],
        [103., 119.]])


In [None]:
model = nn.Linear(3,2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.3394,  0.0763, -0.1223],
        [-0.1840, -0.2718, -0.0303]], requires_grad=True)
Parameter containing:
tensor([-0.3469,  0.1289], requires_grad=True)


In [None]:
list(model.parameters())#help us see the list of weights and bias in our model. will see in the essence if we have multiple models and they contain multiple w&b

[Parameter containing:
 tensor([[-0.3394,  0.0763, -0.1223],
         [-0.1840, -0.2718, -0.0303]], requires_grad=True),
 Parameter containing:
 tensor([-0.3469,  0.1289], requires_grad=True)]

In [None]:
#predictions
preds = model(inputs)
preds

tensor([[-25.2689, -32.8166],
        [-32.3439, -42.4726],
        [-26.7405, -54.0555],
        [-36.2102, -31.4495],
        [-25.0000, -40.7798],
        [-25.6847, -32.7289],
        [-32.5426, -42.2311],
        [-27.2022, -54.2699],
        [-35.7945, -31.5372],
        [-24.7829, -40.6260],
        [-25.4676, -32.5751],
        [-32.7597, -42.3849],
        [-26.5418, -54.2970],
        [-36.4273, -31.6032],
        [-24.5842, -40.8675]], grad_fn=<AddmmBackward>)

In [None]:
#loss function by pytorch
import torch.nn.functional as F #this function contains a lotta functions like activation function, loss function

In [None]:
loss_fn = F.mse_loss
loss = loss_fn(model(inputs), targets)
loss

tensor(15663.5732, grad_fn=<MseLossBackward>)

In [19]:
#Optimizer/Gradient Descent(to perform update w and biase authomatically)
#sgd=stochastic gradient descent. the descent is performed in randomm instead of group
#from keras import optimizers
#sgd = optimizers.SGD(lr=1)
opt = torch.optim.SGD(model.parameters(),lr=1e-5)

## Train the model

We are now ready to train the model. We'll follow the same process to implement gradient descent:

1. Generate predictions

2. Calculate the loss

3. Compute gradients w.r.t the weights and biases

4. Adjust the weights by subtracting a small quantity proportional to the gradient

5. Reset the gradients to zero

The only change is that we'll work batches of data instead of processing the entire training data in every iteration. Let's define a utility function `fit` that trains the model for a given number of epochs.

In [33]:
def fit(num_epochs, model, loss_fn, opt, train_dl ):

  for epoch in range(num_epochs):

     # Train with batches of data
        for xb,yb in train_dl:

          #1.generate prediction
          preds = model(xb)

          #2. calculate loss
          loss = loss_fn(preds, yb)

          #3.Compute gradients 
          loss.backward()

          #4.update parameters(weights & biases). instead of doing torch.no_grad etc 
          opt.step()

          #5.Reset the gradients to zero 
          opt.zero_grad()

        if (epoch+1) % 10 == 0:
          print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))




In [34]:
fit(101, model,loss_fn, opt, train_dl )


Epoch [10/101], Loss: 1.4160
Epoch [20/101], Loss: 1.6135
Epoch [30/101], Loss: 1.2748
Epoch [40/101], Loss: 0.9689
Epoch [50/101], Loss: 1.8887
Epoch [60/101], Loss: 0.9176
Epoch [70/101], Loss: 1.6120
Epoch [80/101], Loss: 0.9516
Epoch [90/101], Loss: 1.4199
Epoch [100/101], Loss: 1.2017


In [35]:
preds = model(inputs)
preds

tensor([[ 56.8944,  70.4441],
        [ 81.8240, 100.3168],
        [118.6147, 133.2373],
        [ 20.9183,  37.8712],
        [101.3823, 118.0720],
        [ 55.6459,  69.3602],
        [ 81.6436, 100.3729],
        [118.8922, 133.8163],
        [ 22.1668,  38.9550],
        [102.4503, 119.2121],
        [ 56.7140,  70.5002],
        [ 80.5756,  99.2329],
        [118.7951, 133.1811],
        [ 19.8503,  36.7311],
        [102.6307, 119.1559]], grad_fn=<AddmmBackward>)

In [36]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

In [39]:
model(torch.tensor([[75,63,44.]]))

tensor([[53.3639, 67.5291]], grad_fn=<AddmmBackward>)