## References
- Machine Learning with Python Cookbook, 2nd Edition, Kyle Gallatin, Chris Albon, O'Reilly Media, Inc.
- http://alexlenail.me/NN-SVG/index.html
- https://machinelearningmastery.com/choose-an-activation-function-for-deep-learning/

In [20]:
# We import all the necessary libraries at the beginning
import torch
import torch.nn as nn 
import numpy as np 
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import RMSprop 
from sklearn.datasets import make_regression 
from sklearn.model_selection import train_test_split

In [24]:
print(torch.__version__)

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

# Additional Info when using CUDA
if device.type == 'cuda':
    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.cuda.device(0))
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

2.0.1+cu118
Using device: cuda

True
1
<torch.cuda.device object at 0x00000276264AA520>
NVIDIA GeForce RTX 3060 Laptop GPU
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


### What is CUDA?
CUDA is a programming model and computing toolkit developed by NVIDIA. It enables you to perform compute-intensive operations faster by parallelizing tasks across GPUs. CUDA is the dominant API used for deep learning although other options are available, such as OpenCL. PyTorch provides support for CUDA in the torch.

In [2]:
# We generate a sample regression dataset
features, target = make_regression(n_features = 5, n_samples = 1000)
features_train, features_test, target_train, target_test = train_test_split(features, target, test_size = 0.1, random_state = 1)

In [3]:
# Print the type and shape of the dataset
print('Training feature set -> type - {}, shape - {}'.format(type(features_train), features_train.shape))
print(features_train[0])

Training feature set -> type - <class 'numpy.ndarray'>, shape - (900, 5)
[-0.94676239 -1.215222    0.01770675  0.37488315  0.56145877]


In [4]:
# Print the type and shape of the dataset
print('Test feature set -> type - {}, shape - {}'.format(type(features_test), features_test.shape))
print(features_test[0])

Test feature set -> type - <class 'numpy.ndarray'>, shape - (100, 5)
[ 0.71654831 -1.62444604 -0.42186141  1.59541193 -2.00873225]


In [5]:
# Print the type and shape of the dataset
print('Training target set -> type - {}, shape - {}'.format(type(target_train), target_train.shape))
print(target_train[0])

Training target set -> type - <class 'numpy.ndarray'>, shape - (900,)
-138.52135262757946


In [6]:
# Print the type and shape of the dataset
print('Test target set -> type - {}, shape - {}'.format(type(target_test), target_test.shape))
print(target_test[0])

Test target set -> type - <class 'numpy.ndarray'>, shape - (100,)
-15.527435215048516


In [8]:
# To work with Pytorch, we need to convert the ndarrays to tensors
x_train = torch.from_numpy(features_train).float() 
y_train = torch.from_numpy(target_train).float().view(-1,1) 
x_test = torch.from_numpy(features_test).float()
y_test = torch.from_numpy(target_test).float().view(-1,1)

In [26]:
# Print the type and shape of the converted dataset
print('Training feature set -> type - {}, shape - {}, is CUDA - {}'.format(type(x_train), x_train.shape, x_train.is_cuda))
print(x_train[0])

Training feature set -> type - <class 'torch.Tensor'>, shape - torch.Size([900, 5]), is CUDA - False
tensor([-0.9468, -1.2152,  0.0177,  0.3749,  0.5615])


In [27]:
# Print the type and shape of the converted dataset
print('Test feature set -> type - {}, shape - {}, is CUDA - {}'.format(type(x_test), x_test.shape, x_test.is_cuda))
print(x_test[0])

Test feature set -> type - <class 'torch.Tensor'>, shape - torch.Size([100, 5]), is CUDA - False
tensor([ 0.7165, -1.6244, -0.4219,  1.5954, -2.0087])


In [28]:
# Print the type and shape of the converted dataset
print('Train target set -> type - {}, shape - {}, is CUDA - {}'.format(type(y_train), y_train.shape, y_train.is_cuda))
print(y_train[0])

Train target set -> type - <class 'torch.Tensor'>, shape - torch.Size([900, 1]), is CUDA - False
tensor([-138.5213])


In [29]:
# Print the type and shape of the converted dataset
print('Test target set -> type - {}, shape - {}, is CUDA - {}'.format(type(y_test), y_test.shape, y_test.is_cuda))
print(y_test[0])

Test target set -> type - <class 'torch.Tensor'>, shape - torch.Size([100, 1]), is CUDA - False
tensor([-15.5274])


In [32]:
# Now, send all the datasets to GPU, if available
x_train = x_train.to(device)
x_test = x_test.to(device)
y_train = y_train.to(device)
y_test = y_test.to(device)

# Print the type and shape of the converted dataset
print('Test target set -> type - {}, shape - {}, is CUDA - {}'.format(type(y_test), y_test.shape, y_test.is_cuda))
print(y_test[0])

Test target set -> type - <class 'torch.Tensor'>, shape - torch.Size([100, 1]), is CUDA - True
tensor([-15.5274], device='cuda:0')


In [13]:
# Define the Neural Netowrk to solve this regression problem
class RegressorNeuralNet(nn.Module): 
    def __init__(self): 
        super(RegressorNeuralNet, self).__init__()
        self.fc1 = nn.Linear(5, 10) # in_features=5, out_features=10
        self.fc2 = nn.Linear(10, 10) # in_features=10, out_features=10
        self.fc3 = nn.Linear(10, 1)  # in_features=10, out_features=1
        
    def forward(self, x): 
        x = nn.functional.relu(self.fc1(x))    # We used RELU as the activation function
        x = nn.functional.relu(self.fc2(x))    # We used RELU as the activation function
        x = self.fc3(x)                        # No activation function at the output layer as this is a regression problem
        return x

### Activation function
- The purpose of an activation function is to introduce **non-linearity** into the output of a neuron.
- Following are few popular activation functions for hidden layers:
    - Rectified linear activation function or RELU
    - Sigmoid
    - TanH
- Following are few popular activation functions for output layers:
    - Linear
    - Sigmoid
    - Softmax

<table>
    <tr>
      <td>
      <img src='Linear.png' width=300>
      </td>
      <td>
      <img src='RELU.png' width=300>
      </td>
     </tr>
    <tr>
      <td>
      Linear
      </td>
      <td>
      RELU
      </td>
     </tr>  
    <tr>
      <td>
      <img src='Sigmoid.png' width=300>
      </td>
      <td>
      <img src='TanH.png' width=300>
      </td>
     </tr>
    <tr>
      <td>
      Sigmoid
      </td>
      <td>
      TanH
      </td>
     </tr>     
</table>

In [14]:
# Initialize the network now
network = RegressorNeuralNet()

# Define loss function and the optimizer
criterion = nn.MSELoss()
optimizer = RMSprop(network.parameters())

## Calculating loss for regression problems
### We need to know how close are the predictions of the network to the actual targets
- Mean Absolute Error
- Mean Squared Error, this is in the square of the target value, so not very intuititive
- Root Mean Squared Error, this is in the same unit as the target value

In [15]:
# Print the network
network

RegressorNeuralNet(
  (fc1): Linear(in_features=5, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
)

<img src="regressor_neural_network.png" align="left" width="500" />

In [33]:
# Send the network to GPU as well, if available
network.to(device)

RegressorNeuralNet(
  (fc1): Linear(in_features=5, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=1, bias=True)
)

In [16]:
# Next, we define the dataloader
train_data = TensorDataset(x_train, y_train) 
train_loader = DataLoader(train_data, batch_size = 100, shuffle = True)

In [17]:
# Define the number of EPOCHS we want to train
EPOCHS = 20

In [18]:
# Start the training
for epoch in range(EPOCHS):
    for batch_idx, (data, target) in enumerate(train_loader): 
        optimizer.zero_grad()  # This is very important to zero out the gradient before every iterations
        output = network(data) # This is where we are performing the forward propagation
        loss = criterion(output, target) # This is where we are calculating the loss
        loss.backward() # This is where we are performing the backward propagation
        optimizer.step() # This is where we update the network parameters
    print("Epoch:", epoch+1, "\tLoss:", loss.item())

Epoch: 1 	Loss: 19860.64453125
Epoch: 2 	Loss: 7681.814453125
Epoch: 3 	Loss: 3047.110595703125
Epoch: 4 	Loss: 3448.823974609375
Epoch: 5 	Loss: 1567.0345458984375
Epoch: 6 	Loss: 659.45068359375
Epoch: 7 	Loss: 664.176513671875
Epoch: 8 	Loss: 405.0368347167969
Epoch: 9 	Loss: 316.1385192871094
Epoch: 10 	Loss: 363.69683837890625
Epoch: 11 	Loss: 296.521484375
Epoch: 12 	Loss: 216.78538513183594
Epoch: 13 	Loss: 205.28973388671875
Epoch: 14 	Loss: 282.2417907714844
Epoch: 15 	Loss: 176.0799560546875
Epoch: 16 	Loss: 181.28224182128906
Epoch: 17 	Loss: 204.53782653808594
Epoch: 18 	Loss: 184.54037475585938
Epoch: 19 	Loss: 184.8148651123047
Epoch: 20 	Loss: 134.3917999267578


In [19]:
# Evaluate neural network 
with torch.no_grad(): # When we evaluate the network, we dont track the gradients
    output = network(x_test) # We derive the output
    test_loss = float(criterion(output, y_test)) # We calculate the loss
    print("Test MSE:", test_loss)

Test MSE: 188.34962463378906
