## References
- Machine Learning with Python Cookbook, 2nd Edition, Kyle Gallatin, Chris Albon, O'Reilly Media, Inc.
- http://alexlenail.me/NN-SVG/index.html
- https://machinelearningmastery.com/choose-an-activation-function-for-deep-learning/

In [1]:
# We import all the necessary libraries at the beginning
import torch
import torch.nn as nn 
import numpy as np 
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import RMSprop 
from sklearn.datasets import make_regression 
from sklearn.model_selection import train_test_split

In [2]:
print(torch.__version__)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

# Additional Info when using CUDA
if device.type == 'cuda':
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.cuda.device(0))
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

2.0.1+cu118
Using device: cuda

True
1
<torch.cuda.device object at 0x000001BAAB54BA00>
NVIDIA GeForce RTX 3060 Laptop GPU
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


### What is CUDA?
CUDA is a programming model and computing toolkit developed by NVIDIA. It enables you to perform compute-intensive operations faster by parallelizing tasks across GPUs. CUDA is the dominant API used for deep learning although other options are available, such as OpenCL. PyTorch provides support for CUDA in the torch.

In [3]:
# We generate a sample regression dataset
features, target = make_regression(n_features = 5, n_samples = 1000)
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.1, random_state = 1)

In [4]:
# Print the type and shape of the dataset
print('Training feature set -> type - {}, shape - {}'.format(type(features_train), features_train.shape))
print(features_train[0])

Training feature set -> type - <class 'numpy.ndarray'>, shape - (900, 5)
[ 1.04321313 -1.02985758  1.23174312  1.18357504 -1.01442013]


In [5]:
# Print the type and shape of the dataset
print('Test feature set -> type - {}, shape - {}'.format(type(features_test), features_test.shape))
print(features_test[0])

Test feature set -> type - <class 'numpy.ndarray'>, shape - (100, 5)
[-0.5651733   0.52641371  0.21278937 -0.12444013  0.11151893]


In [6]:
# Print the type and shape of the dataset
print('Training target set -> type - {}, shape - {}'.format(type(target_train), target_train.shape))
print(target_train[0])

Training target set -> type - <class 'numpy.ndarray'>, shape - (900,)
-27.461410879784403


In [7]:
# Print the type and shape of the dataset
print('Test target set -> type - {}, shape - {}'.format(type(target_test), target_test.shape))
print(target_test[0])

Test target set -> type - <class 'numpy.ndarray'>, shape - (100,)
28.258371309387496


In [8]:
# To work with Pytorch, we need to convert the ndarrays to tensors
x_train = torch.from_numpy(features_train).float() 
y_train = torch.from_numpy(target_train).float().view(-1,1) 
x_test = torch.from_numpy(features_test).float()
y_test = torch.from_numpy(target_test).float().view(-1,1)

In [9]:
# Print the type and shape of the converted dataset
print('Training feature set -> type - {}, shape - {}, is CUDA - {}'.format(type(x_train), x_train.shape, x_train.is_cuda))
print(x_train[0])

Training feature set -> type - <class 'torch.Tensor'>, shape - torch.Size([900, 5]), is CUDA - False
tensor([ 1.0432, -1.0299,  1.2317,  1.1836, -1.0144])


In [10]:
# Print the type and shape of the converted dataset
print('Test feature set -> type - {}, shape - {}, is CUDA - {}'.format(type(x_test), x_test.shape, x_test.is_cuda))
print(x_test[0])

Test feature set -> type - <class 'torch.Tensor'>, shape - torch.Size([100, 5]), is CUDA - False
tensor([-0.5652,  0.5264,  0.2128, -0.1244,  0.1115])


In [11]:
# Print the type and shape of the converted dataset
print('Train target set -> type - {}, shape - {}, is CUDA - {}'.format(type(y_train), y_train.shape, y_train.is_cuda))
print(y_train[0])

Train target set -> type - <class 'torch.Tensor'>, shape - torch.Size([900, 1]), is CUDA - False
tensor([-27.4614])


In [12]:
# Print the type and shape of the converted dataset
print('Test target set -> type - {}, shape - {}, is CUDA - {}'.format(type(y_test), y_test.shape, y_test.is_cuda))
print(y_test[0])

Test target set -> type - <class 'torch.Tensor'>, shape - torch.Size([100, 1]), is CUDA - False
tensor([28.2584])


In [13]:
# Now, send all the datasets to GPU, if available
x_train = x_train.to(device)
x_test = x_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)

# Print the type and shape of the converted dataset
print('Test target set -> type - {}, shape - {}, is CUDA - {}'.format(type(y_test), y_test.shape, y_test.is_cuda))
print(y_test[0])

Test target set -> type - <class 'torch.Tensor'>, shape - torch.Size([100, 1]), is CUDA - True
tensor([28.2584], device='cuda:0')


In [14]:
# Define the Neural Netowrk to solve this regression problem
class RegressorNeuralNet(nn.Module): 
    def __init__(self): 
        super(RegressorNeuralNet, self).__init__()
        self.fc1 = nn.Linear(5, 10) # in_features=5, out_features=10
        self.fc2 = nn.Linear(10, 10) # in_features=10, out_features=10
        self.fc3 = nn.Linear(10, 1)  # in_features=10, out_features=1
        
    def forward(self, x): 
        x = nn.functional.relu(self.fc1(x))    # We used RELU as the activation function
        x = nn.functional.relu(self.fc2(x))    # We used RELU as the activation function
        x = self.fc3(x)                        # No activation function at the output layer as this is a regression problem
        return x

### Activation function
- The purpose of an activation function is to introduce **non-linearity** into the output of a neuron.
- Following are few popular activation functions for hidden layers:
    - Rectified linear activation function or RELU
    - Sigmoid
    - TanH
- Following are few popular activation functions for output layers:
    - Linear
    - Sigmoid
    - Softmax

<table>
    <tr>
      <td>
      <img src='Linear.png' width=300>
      </td>
      <td>
      <img src='RELU.png' width=300>
      </td>
     </tr>
    <tr>
      <td>
      Linear
      </td>
      <td>
      RELU
      </td>
     </tr>  
    <tr>
      <td>
      <img src='Sigmoid.png' width=300>
      </td>
      <td>
      <img src='TanH.png' width=300>
      </td>
     </tr>
    <tr>
      <td>
      Sigmoid
      </td>
      <td>
      TanH
      </td>
     </tr>     
</table>

In [15]:
# Initialize the network now
network = RegressorNeuralNet()

# Define loss function and the optimizer
criterion = nn.MSELoss()
optimizer = RMSprop(network.parameters())

## Calculating loss for regression problems
### We need to know how close are the predictions of the network to the actual targets
- Mean Absolute Error
- Mean Squared Error, this is in the square of the target value, so not very intuititive
- Root Mean Squared Error, this is in the same unit as the target value

In [16]:
# Print the network
network

RegressorNeuralNet(
  (fc1): Linear(in_features=5, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
)

<img src="regressor_neural_network.png" align="left" width="500" />

In [17]:
# Send the network to GPU as well, if available
network.to(device)

RegressorNeuralNet(
  (fc1): Linear(in_features=5, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
)

In [18]:
# Next, we define the dataloader
train_data = TensorDataset(x_train, y_train) 
train_loader = DataLoader(train_data, batch_size = 100, shuffle = True)

In [19]:
# Define the number of EPOCHS we want to train
EPOCHS = 20

In [20]:
# Start the training
for epoch in range(EPOCHS):
    for batch_idx, (data, target) in enumerate(train_loader): 
        optimizer.zero_grad()  # This is very important to zero out the gradient before every iterations
        output = network(data) # This is where we are performing the forward propagation
        loss = criterion(output, target) # This is where we are calculating the loss
        loss.backward() # This is where we are performing the backward propagation
        optimizer.step() # This is where we update the network parameters
    print("Epoch:", epoch+1, "\tLoss:", loss.item())

Epoch: 1 	Loss: 16448.1796875
Epoch: 2 	Loss: 12066.3642578125
Epoch: 3 	Loss: 7683.41943359375
Epoch: 4 	Loss: 4495.048828125
Epoch: 5 	Loss: 3641.96875
Epoch: 6 	Loss: 2589.5703125
Epoch: 7 	Loss: 2167.309814453125
Epoch: 8 	Loss: 1404.2508544921875
Epoch: 9 	Loss: 696.6170043945312
Epoch: 10 	Loss: 558.0582885742188
Epoch: 11 	Loss: 451.7222595214844
Epoch: 12 	Loss: 659.2296142578125
Epoch: 13 	Loss: 400.7658996582031
Epoch: 14 	Loss: 369.77789306640625
Epoch: 15 	Loss: 264.9070129394531
Epoch: 16 	Loss: 232.63015747070312
Epoch: 17 	Loss: 291.1038818359375
Epoch: 18 	Loss: 217.6041717529297
Epoch: 19 	Loss: 214.98260498046875
Epoch: 20 	Loss: 201.41847229003906


In [21]:
# Evaluate neural network 
with torch.no_grad(): # When we evaluate the network, we dont track the gradients
    output = network(x_test) # We derive the output
    test_loss = float(criterion(output, y_test)) # We calculate the loss
    print("Test MSE:", test_loss)

Test MSE: 222.51780700683594
