<a href="https://colab.research.google.com/github/shiva811/Music-Deep-Learning-/blob/master/linear_regression_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch

In [10]:
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [11]:
print("input_shape", inputs.shape)

input_shape (5, 3)


In [12]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [13]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [14]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-1.4307, -0.0283,  0.0461],
        [ 1.1953,  1.5512,  0.3695]], requires_grad=True)
tensor([-1.7402, -1.0374], requires_grad=True)


In [15]:
def model(x):
    return x @ w.t() + b

In [17]:
preds = model(inputs)
print(preds)

tensor([[-106.0911,  206.0339],
        [-131.4684,  267.8827],
        [-127.3224,  332.2377],
        [-147.1800,  201.2521],
        [ -99.9423,  256.2130]], grad_fn=<AddBackward0>)


In [18]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [19]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()
    

In [20]:
# Compute loss
loss = mse(preds, targets)
print(loss)

tensor(33375.5391, grad_fn=<DivBackward0>)


In [21]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7fce2ea7bcc0>

In [22]:
loss.backward()
print(w)
print(w.grad)

tensor([[-1.4307, -0.0283,  0.0461],
        [ 1.1953,  1.5512,  0.3695]], requires_grad=True)
tensor([[-16771.3418, -17864.3457, -11064.0439],
        [ 13734.3770,  14146.6172,   8753.5947]])


In [37]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [38]:
# Let's verify that the loss is actually lower
loss = mse(preds, targets)
print(loss)

tensor(33375.5391, grad_fn=<DivBackward0>)


In [43]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()
        

In [44]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(3.6162, grad_fn=<DivBackward0>)


Train the model using gradient descent
As seen above, we reduce the loss and improve our model using the gradient descent optimization algorithm. Thus, we can train the model using the following steps:

Generate predictions

Calculate the loss

Compute gradients w.r.t the weights and biases

Adjust the weights by subtracting a small quantity proportional to the gradient

Reset the gradients to zero

Let's implement the above step by step.

In [None]:
preds = model(inputs)
print(preds)

tensor([[ 51.2508,  91.3178],
        [ 58.1372, 125.5632],
        [ 62.6574, 137.0176],
        [ 79.9491,  85.2801],
        [ 34.4480, 127.4283]], grad_fn=<AddBackward0>)


In [45]:
import torch.nn as nn


In [46]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [47]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.],
        [ 88., 134.,  59.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 73.,  66.,  44.],
        [ 92.,  87.,  64.],
        [ 87., 135.,  57.],
        [103.,  43.,  36.],
        [ 68.,  97.,  70.]])

In [48]:
from torch.utils.data import TensorDataset


In [49]:
# Define dataset
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [50]:
from torch.utils.data import DataLoader


In [51]:
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [71]:
for xb, yb in train_dl:
    print(xb, yb) 
    break

tensor([[ 69.,  96.,  70.],
        [ 91.,  87.,  65.],
        [101.,  44.,  37.],
        [ 73.,  67.,  43.],
        [ 74.,  66.,  43.]]) tensor([[103., 119.],
        [ 80., 102.],
        [ 21.,  38.],
        [ 56.,  70.],
        [ 57.,  69.]])


In [57]:

# Define model
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.4949, -0.0528, -0.2190],
        [ 0.1162,  0.1921, -0.5770]], requires_grad=True)
Parameter containing:
tensor([-0.3050, -0.5555], requires_grad=True)


In [58]:
# Parameters
list(model.parameters())

[Parameter containing:
 tensor([[-0.4949, -0.0528, -0.2190],
         [ 0.1162,  0.1921, -0.5770]], requires_grad=True),
 Parameter containing:
 tensor([-0.3050, -0.5555], requires_grad=True)]

In [59]:

# Generate predictions
preds = model(inputs)
preds

tensor([[-49.3814,  -4.0190],
        [-63.9959, -10.0123],
        [-63.1290,   1.8201],
        [-61.1521,  -1.7969],
        [-54.8452, -14.4941],
        [-49.8235,  -4.0949],
        [-64.1622, -10.7815],
        [-63.8429,   1.3592],
        [-60.7099,  -1.7210],
        [-54.5694, -15.1874],
        [-49.5477,  -4.7881],
        [-64.4380, -10.0882],
        [-62.9627,   2.5892],
        [-61.4279,  -1.1037],
        [-54.4031, -14.4182]], grad_fn=<AddmmBackward>)

In [60]:
# Import nn.functional
import torch.nn.functional as F

In [61]:
# Define loss function
loss_fn = F.mse_loss

In [62]:
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(15135.3477, grad_fn=<MseLossBackward>)


In [63]:
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

Train the model
We are now ready to train the model. We'll follow the same process to implement gradient descent:

Generate predictions

Calculate the loss

Compute gradients w.r.t the weights and biases

Adjust the weights by subtracting a small quantity proportional to the gradient

Reset the gradients to zero

The only change is that we'll work batches of data instead of processing the entire training data in every iteration. Let's define a utility function fit that trains the model for a given number of epochs.

In [65]:
# Train with batches of data
for xb,yb in train_dl:
    
    # 1. Generate predictions
    pred = model(xb)
    
    # 2. Calculate loss
    loss = loss_fn(pred, yb)
    
    # 3. Compute gradients
    loss.backward()
    
    # 4. Update parameters using gradients
    opt.step()
    
    # 5. Reset the gradients to zero
    opt.zero_grad()
        


In [66]:
loss.item()

1989.483642578125

In [73]:
# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    
    # Repeat for given number of epochs
    for epoch in range(num_epochs):
        
        # Train with batches of data
        for xb,yb in train_dl:
            
            # 1. Generate predictions
            pred = model(xb)
            
            # 2. Calculate loss
            loss = loss_fn(pred, yb)
            
            # 3. Compute gradients
            loss.backward()
            
            # 4. Update parameters using gradients
            opt.step()
            
            # 5. Reset the gradients to zero
            opt.zero_grad()
        
        # Print the progress
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item())) #since loss is a tensor, in order to get it as a number, we put loss.item()

In [74]:
fit(100, model, loss_fn, opt, train_dl)


Epoch [10/100], Loss: 382.3283
Epoch [20/100], Loss: 295.3376
Epoch [30/100], Loss: 203.4067
Epoch [40/100], Loss: 193.3706
Epoch [50/100], Loss: 105.6775
Epoch [60/100], Loss: 88.7913
Epoch [70/100], Loss: 43.0349
Epoch [80/100], Loss: 55.5265
Epoch [90/100], Loss: 54.1253
Epoch [100/100], Loss: 45.2944
