# $\color{ForestGreen}{\text{Regression through PyTorch}}$

In [None]:
# Importing basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from tqdm import tqdm
import time

# Importing some basic modules in torch
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import BatchSampler, Dataset, DataLoader

In deep learning, GPUs are commonly used to fortify the computational power and reduce the computational time. The following function moves the data from CPU to a GPU device if one is provided. But today, let's just use CPU.

In [None]:
def moveTo(obj, device):
    if isinstance(obj, list):
        return [moveTo(x, device) for x in obj]
    elif isinstance(obj, tuple):
        return tuple(moveTo(list(obj), device))
    elif isinstance(obj, set):
        return set(moveTo(list(obj), device))
    elif isinstance(obj, dict):
        to_ret = dict()
        for key, value in obj.items():
            to_ret[moveTo(key, device)] = moveTo(value, device)
        return to_ret
    elif hasattr(obj, "to"):
        return obj.to(device)
    else:
        return obj

The following function function calculates the gradient of a given loss function and updates the parameters of the given model for one epoch

In [None]:
def run_epoch(model, optimizer, data_loader, loss_func, device, results, score_funcs, prefix="", desc=None):
    running_loss = []
    y_true = []
    y_pred = []
    start = time.time()
    for inputs, labels in tqdm(data_loader, desc=desc, leave=False):
        #Move the batch to the device we are using. 
        inputs = moveTo(inputs, device)
        labels = moveTo(labels, device)
        
        y_hat = model(inputs) 

        # Compute loss.
        loss = loss_func(y_hat, labels)
         
        if model.training:
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        #Now we are just grabbing some information we would like to have
        running_loss.append(loss.item())

        if len(score_funcs) > 0 and isinstance(labels, torch.Tensor):
            #moving labels & predictions back to CPU for computing / storing predictions
            labels = labels.detach().cpu().numpy()
            y_hat = y_hat.detach().cpu().numpy()
            #add to predictions so far
            y_true.extend(labels.tolist())
            y_pred.extend(y_hat.tolist())
    #end training epoch
    end = time.time()
    
    y_pred = np.asarray(y_pred)
    
    results[prefix + " loss"].append( np.mean(running_loss) )
    for name, score_func in score_funcs.items():
        try:
            results[prefix + " " + name].append( score_func(y_true, y_pred) )  # For regression 
        except:
            results[prefix + " " + name].append(score_func(y_true, np.argmax(y_pred, axis=1))) # For classification
    return end-start #time spent on epoch

The following function trains a given model and calculate the specified relevant performance metrics for a number of (specified) epochs. Furthermore, it collects the calculated metrics for all the epochs in a dataframe.

In [None]:
def train_simple_network_alt(model, optimizer, loss_func, eta, train_loader, test_loader=None, score_funcs=None, 
                         epochs=50, device="cpu"):
    to_track = ["epoch", "total time", "train loss"]
    if test_loader is not None:
        to_track.append("test loss")
    if score_funcs is not None:
        for eval_score in score_funcs:
            to_track.append("train " + eval_score )
            if test_loader is not None:
                to_track.append("test " + eval_score )
        
    total_train_time = 0              #How long have we spent in the training loop? 
    results = {}
    #Initialize every item with an empty list
    for item in to_track:
        results[item] = []
        
    #Place the model on the correct compute resource (CPU or GPU)
    model.to(device)
    for epoch in tqdm(range(1, epochs+1), desc="Epoch"):
        model = model.train()#Put our model in training mode
        
        total_train_time += run_epoch(model, optimizer, train_loader, loss_func, device, results, score_funcs, 
                                      prefix="train", desc="Training")  
        results["total time"].append( total_train_time )
        results["epoch"].append( epoch )
        
        if test_loader is not None:
            model = model.eval()
            with torch.no_grad():
                run_epoch(model, optimizer, test_loader, loss_func, device, results, score_funcs, prefix="test", 
                          desc="Testing")
                
    return pd.DataFrame.from_dict(results)

## $\color{ForestGreen}{\text{A Toy Dataset for Regression}}$

In [None]:
X = np.linspace(0, 30, num=300)
#create an output
y = -X + np.cos(X)*3 + np.random.normal(0, 1, size=X.shape) # Adding some noise coming from a normal distribution
sns.scatterplot(x=X, y=y)
plt.title('Graph of X vs y')
plt.xlabel('x')
plt.ylabel('y')
plt.gcf().set_size_inches(7, 5)
plt.show()

The following class takes the feature and the target as
numpy arrays and converts them into a torch.tensor.

In [None]:
class RegressionDataset(Dataset):        
    def __init__(self, X, y):
        self.X = X.reshape(-1,1)
        self.y = y.reshape(-1,1)
    
    def __getitem__(self, index):
        return torch.tensor(self.X[index,:], dtype=torch.float32), torch.tensor(self.y[index], dtype=torch.float32)

    def __len__(self):
        return self.X.shape[0]

In [None]:
# We use 'torch.utils.data.random_split' to split the data in pytorch into train and test subsets
data = RegressionDataset(X, y)

train_data, test_data = torch.utils.data.random_split(data, (len(data)-50, 50)) 

In [None]:
# Loading the data through 'DataLoader':
train_loader = DataLoader(train_data , shuffle=True)
test_loader = DataLoader(test_data, shuffle=False)

In [None]:
# Building the model
n_features = 1   # Number of features
n_targets = 1    # Number of targets

eta=0.001        # Learning rate

#del model_1
model_1 = nn.Sequential(
    nn.Linear(n_features,  n_targets),  # One layer directly from features to the targets (no hidden layers)
)

optimizer = torch.optim.Adam(model_1.parameters(), lr=eta)  # Adam is a very common optimizer used in DL

loss_func = nn.MSELoss()   # We choose the loss function to be MSE 

In [None]:
# Training the model and collect the relevant performance metrics 
fc_1_results = train_simple_network_alt(model_1, optimizer, loss_func, eta, train_loader, test_loader, 
                         score_funcs={'accuracy':r2_score}, epochs=25)

In [None]:
# Displaying the calculated metrics
fc_1_results

In [None]:
# Plotting the loss function of the fully connected model per epoch:
sns.lineplot(x='epoch', y='train loss', data=fc_1_results[1:], label='train loss')
sns.lineplot(x='epoch', y='test loss', data=fc_1_results[1:], label='test loss')
plt.title('MSE Loss Function of Model 1')
plt.gcf().set_size_inches(7, 5)
plt.show()

In [None]:
# Plotting the R^2 score of the fully connected model per epoch:
sns.lineplot(x='epoch', y='train accuracy', data=fc_1_results[6:], label=r'train $R^2$')
sns.lineplot(x='epoch', y='test accuracy', data=fc_1_results[6:], label=r'test $R^2$')
plt.title(r'$R^2$ Score of Model 1')
plt.gcf().set_size_inches(7, 5)
plt.show()

In [None]:
# Visualizing the result of Model 1 (linear regression through a neural network)
with torch.no_grad():
    Y_pred = model_1(torch.tensor(X.reshape(-1,1), dtype=torch.float32)).cpu().numpy() #Shape of (N, 1)

# Note: numpy.ravel(a, order='C') returns a contiguous flattened array.    

sns.scatterplot(x=X, y=y, color='blue', label='Data') #The data
sns.lineplot(x=X, y=Y_pred.ravel(), color='red', label='Model') #What our model learned
plt.title('Single Linear Layer Prediction')
plt.gcf().set_size_inches(7, 5)
plt.show()

Now let's use three linear layers!

In [None]:
n_features = 1  # Number of features
n_targets = 1   # Number of targets

n_neurons = 32  # Number of neurons used in hidden layers

eta=0.001       # Learning rate

#del model_2
model_2 = nn.Sequential(
    nn.Linear(n_features,  n_neurons),  # From input layer to hidden layer 1
    nn.Linear(n_neurons,  n_neurons),   # From hidde layer 1 to hidden layer 2
    nn.Linear(n_neurons,  n_neurons),   # From hidde layer 2 to hidden layer 3
    nn.Linear(n_neurons,  n_targets),   # From hidde layer 3 to output layer 
)

optimizer = torch.optim.Adam(model_2.parameters(), lr=eta)   # Adam optimizer 

loss_func = nn.MSELoss()  # We choose the loss function to be MSE

In [None]:
# Training the model and collect the relevant performance metrics 

fc_2_results = train_simple_network_alt(model_2, optimizer, loss_func, eta, train_loader, test_loader, 
                         score_funcs={'accuracy':r2_score}, epochs=25)

In [None]:
# Displaying the calculated metrics

fc_2_results

In [None]:
# Visualizing the result of Model 2 (linear regression through a neural network)

with torch.no_grad():
    Y_pred = model_2(torch.tensor(X.reshape(-1,1), dtype=torch.float32)).cpu().numpy() #Shape of (N, 1)
    
sns.scatterplot(x=X, y=y, color='blue', label='Data') #The data
sns.lineplot(x=X, y=Y_pred.ravel(), color='red', label='Model') #What our model learned
plt.title('Multiple Linear Layers Prediction')
plt.gcf().set_size_inches(7, 5)
plt.show()

Now, let's add nonlinearity to our network!

In [None]:
n_features = 1  # Number of features
n_targets = 1   # Number of targets

n_neurons = 64  # Number of neurons used in hidden layers

eta=0.001       # Learning rate

#del model_3
model_3 = nn.Sequential(
    nn.Linear(n_features,  n_neurons),   # From input layer to hidden layer 1
    nn.Tanh(),                           # Applying tanh activation function
    nn.Linear(n_neurons,  n_neurons),    # From hidde layer 1 to hidden layer 2
    nn.Tanh(),                           # Applying tanh activation function 
    nn.Linear(n_neurons,  n_neurons),    # From hidde layer 2 to hidden layer 3
    nn.Tanh(),                           # Applying tanh activation function
    nn.Linear(n_neurons,  n_targets),    # From hidde layer 3 to output layer 
)

optimizer = torch.optim.Adam(model_3.parameters(), lr=eta)    # Adam optimizer 

loss_func = nn.MSELoss()                 # We choose the loss function to be MSE

In [None]:
# Training the model and collect the relevant performance metrics 

fc_3_results = train_simple_network_alt(model_3, optimizer, loss_func, eta, train_loader, test_loader, 
                         score_funcs={'accuracy':r2_score}, epochs=35)

In [None]:
# Visualizing the result of Model 3 (neural network in presence of activation functions)

with torch.no_grad():
    Y_pred = model_3(torch.tensor(X.reshape(-1,1), dtype=torch.float32)).cpu().numpy() #Shape of (N, 1)
    
sns.scatterplot(x=X, y=y, color='blue', label='Data') #The data
sns.lineplot(x=X, y=Y_pred.ravel(), color='red', label='Model') #What our model learned
plt.title('Predictions in the Presence of Activations')
plt.gcf().set_size_inches(7, 5)
plt.show()

In [None]:
# Displaying the calculated metrics

fc_3_results[-10:]

In [None]:
# Plotting the accuracy of model 3:

sns.lineplot(x='epoch', y='train accuracy', data=fc_3_results[1:], label=r'train $R^2$')
sns.lineplot(x='epoch', y='test accuracy', data=fc_3_results[1:], label=r'test $R^2$')
plt.title('Accuracy of Model 3')
plt.gcf().set_size_inches(7, 5)
plt.show()