# Linear Regression using PyTorch

### Linear Regression for determining the Crop yields

In [1]:
import numpy as np
import torch

In [2]:
# input = temp, rainfall and humidiy
# output = apple, oranges

inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [3]:
# convert numpy data to Torch

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## Linear regression Model

$ Apple = W_11 * temp + W_12*Rainfall + W_13*Humidity + bias_1$ \
$ Oranges = W_21 * temp + W_22*Rainfall + W_23*Humidity + bias_2$ \

weights = 3 X 2 \ 
data = 3 X 2 \
therefore Matrix multiplication will be:\
                    $ X_{3X2} * W_{2X3}^T + b_2 $ \
We are assigning random weights and biases initally, then it will get adjusted depending on the Loss

In [4]:
# Define Weights

w = torch.randn([2,3], requires_grad = True)
b = torch.randn([2], requires_grad = True)

print('weights = ', w)
print('bias = ', b)

weights =  tensor([[ 0.1147, -0.9016, -0.6377],
        [ 0.4821, -0.3799, -1.5286]], requires_grad=True)
bias =  tensor([ 1.0425, -0.5771], requires_grad=True)


In [5]:
# Define the MODEL

def Reg_model(x):
    return x @ w.t() + b

# '@' represents Matrix Multiplication

In [6]:
# Predict the Model

pred = Reg_model(inputs)
print(pred)

print('')
print(targets)
print('Element wise distance')
print(pred - targets)

tensor([[ -78.4165,  -56.5676],
        [-108.6782,  -87.9682],
        [-146.7853,  -98.1996],
        [ -49.6263,  -24.2991],
        [-122.2398, -110.7842]], grad_fn=<AddBackward0>)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])
Element wise distance
tensor([[-134.4165, -126.5676],
        [-189.6782, -188.9682],
        [-265.7853, -231.1996],
        [ -71.6263,  -61.2991],
        [-225.2398, -229.7841]], grad_fn=<SubBackward0>)


In [7]:
# Negative is not good, convert it into positive

print((pred-targets)**2)

tensor([[18067.8047, 16019.3623],
        [35977.8047, 35708.9805],
        [70641.8125, 53453.2617],
        [ 5130.3223,  3757.5776],
        [50732.9844, 52800.7539]], grad_fn=<PowBackward0>)


In [8]:
# converting the whole data into single value
# this single value represents the total deviation
# Now, we have to reduce this number as minimus as possible

# converting into single number
diff = (pred-targets)
print(torch.sum((diff)**2)/diff.numel())

# .numel() => returns the length of matrix or vector

tensor(34229.0703, grad_fn=<DivBackward0>)


## Loss Function
it is used to improve the model

In [9]:
# MSE Loss
def msee(pred,targets):
    diff = pred - targets
    return torch.sum(diff**2)/diff.numel()

In [10]:
loss = msee(pred, targets)
print(loss)

tensor(34229.0703, grad_fn=<DivBackward0>)


## Gradients
In Pytorch we can auto compute derative w.r.t weights and biases


In [11]:
# Loss is the function of weights and biases
loss.backward()

In [12]:
print(w)
print(w.grad)

tensor([[ 0.1147, -0.9016, -0.6377],
        [ 0.4821, -0.3799, -1.5286]], requires_grad=True)
tensor([[-14608.7734, -17203.1543, -10350.3643],
        [-13731.5049, -16157.0244,  -9859.7812]])


## Gradient Descent

We have to find the lowest Loss - else, if some loss is there, then because of the square term, the loss boosts \
\
**Negative** Gradient(Derivative):\
* increasing value decreases the Loss
* decreasing value increase the Loss \
\
**Positive** Gradient (Derivative):\
* increasing value increase the Loss
* decreasing value decrease the Loss \
\
**Goal**\
To decrease the loss

In [13]:
# running  y.backward() repeatedly, Just take repeated Derivative
# therefore we have to clear the Gradient 
# everytime we go for calculation of new gradient

# w.grad.zero_()
# b.grad.zero_()

print(w.grad)
print(b.grad)

tensor([[-14608.7734, -17203.1543, -10350.3643],
        [-13731.5049, -16157.0244,  -9859.7812]])
tensor([-177.3492, -167.5637])


Adjust weight and biases using **Gradient Descent**
1. Generate Predictions 
2. Calcualte the Loss
3. Compute Gradients w.r.t. weights and bias
4. Adjust the weights by subtracting gradients
5. reset the gradient to zero


In [14]:
# Generate Predictions

prediction = Reg_model(inputs)
print(prediction)

tensor([[ -78.4165,  -56.5676],
        [-108.6782,  -87.9682],
        [-146.7853,  -98.1996],
        [ -49.6263,  -24.2991],
        [-122.2398, -110.7842]], grad_fn=<AddBackward0>)


In [15]:
loss = msee(prediction, targets)
print(loss)

tensor(34229.0703, grad_fn=<DivBackward0>)


In [16]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-29217.5469, -34406.3086, -20700.7285],
        [-27463.0098, -32314.0488, -19719.5625]])
tensor([-354.6985, -335.1275])


* We use **torch.no_grad** to indicate to PyTorch that we shouldn't track, calculate or modify gradients while updating the weights and biases.
* We multiply the gradients with a really small number (10^-5 in this case) as learning rate.
* After we have updated the weights, we reset the gradients back to zero, to avoid affecting any future computations.

In [17]:
# Adjust the weights by subtracting small quantity of gradients
# Adjust and Reset Gradients

learning_rate = 1e-5

with torch.no_grad():
    w -= (w.grad * learning_rate)
    b -= (b.grad * learning_rate)
    w.grad.zero_()
    b.grad.zero_()
    
print(w)


tensor([[ 0.4068, -0.5576, -0.4307],
        [ 0.7567, -0.0567, -1.3314]], requires_grad=True)


In [18]:
# w -= 0.01 * w.grad is an in-place operation, so it performs 
# calculation on existing w and updates the value.

# However, w = w - 0.01 * w.grad is not in-place operation, 
# so it creates a new variable w, which does not have requires_grad set and so the error.

In [19]:
print(w)
print(b)

tensor([[ 0.4068, -0.5576, -0.4307],
        [ 0.7567, -0.0567, -1.3314]], requires_grad=True)
tensor([ 1.0460, -0.5738], requires_grad=True)


In [20]:
# Calculate LOSS

prediction = Reg_model(inputs)
loss = msee(prediction, targets)
loss

tensor(15121.5234, grad_fn=<DivBackward0>)

# Train for Multiple Epoch

In [21]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [22]:
w = torch.randn([2,3], requires_grad = True)
b = torch.randn([2], requires_grad = True)

print('weights = ', w)
print('bias = ', b)

weights =  tensor([[ 0.0624,  0.0322,  1.9052],
        [-0.4808, -0.4674, -0.9244]], requires_grad=True)
bias =  tensor([-0.4053,  0.3311], requires_grad=True)


In [23]:
prediction = Reg_model(inputs)
loss = msee(prediction, targets)
loss.backward()

print(loss)

tensor(27123.3008, grad_fn=<DivBackward0>)


In [24]:
epoch = 100
lr = 1e-5

for i in range(epoch):
    prediction = Reg_model(inputs)
    loss = msee(prediction, targets)
    loss.backward()
    with torch.no_grad():
        w -= (w.grad*lr)
        b -= (b.grad*lr)
        # b = b - learning_rate * eps  *** gives error
        w.grad.zero_()
        b.grad.zero_()

In [25]:
print(loss)

tensor(235.4134, grad_fn=<DivBackward0>)
tensor([[-0.3422,  0.1560,  1.7287],
        [ 0.2579,  0.8902, -0.0959]], requires_grad=True)
tensor([-0.4096,  0.3420], requires_grad=True)


In [26]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [27]:
prediction

tensor([[ 59.4145,  74.7133],
        [ 92.8368,  96.0235],
        [ 90.8999, 136.4471],
        [ 35.4715,  61.5129],
        [111.9346,  96.8257]], grad_fn=<AddBackward0>)