# Metrics

In [None]:
# load libraries
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr

In [None]:
# define function to convert to numpy array
def to_np_array(array):
    # Keep None as it is, and convert others into Numpy array
    if array is not None and not isinstance(array, np.ndarray):
        array = np.array(array)
    if array is not None:
        array = array.squeeze()
    return array

In [None]:
# define baseline metric class
class Metric:

    def __init__(self, plot_name, split):
        # save plot name and split to variables
        self.plot_name = plot_name
        self.split = split

    def __call__(self, y_true, y_pred=None, p_pred=None, plot = False):
        # convert to numpy arrays
        y_true = to_np_array(y_true)
        y_pred = to_np_array(y_pred)
        p_pred = to_np_array(p_pred)
        return self.forward(y_true, y_pred, p_pred, plot)

    def forward(self, y_true, y_pred=None, p_pred=None, plot = False):
        raise NotImplementedError("This is the base class for metrics")

In [None]:
# class to calculate and plot Pearson correlation and R-squared
class CorrAndRSquared(Metric):

  def forward(self, y_true, y_pred = None, p_pred = None, plot = False):

    # calculate R-squared
    r2 = r2_score(y_true, y_pred)

    # calculate Pearson and p-value
    pearson, pvalue = pearsonr(y_true, y_pred)
    if plot:
      # plot true vs predicted values
      self._plot_scatter(y_true, y_pred, r2, pearson, pvalue)

    return r2, (pearson, pvalue)

  def _plot_scatter(self, y_true, y_pred, r2, pearson, pvalue):

    # plot true vs predicted values
    plt.rcParams['axes.labelweight'] = 'bold'
    plt.rcParams['axes.titleweight'] = 'bold'

    plt.figure(figsize = (8, 6))

    plt.scatter(y_pred, y_true, color = 'black', label = 'Actual Performance')
    plt.plot(y_true, y_true, color = 'red', linestyle = '--', label = 'Theoretical Perfect Performance')

    plt.xlabel('Predicted Value', size = 12)
    plt.ylabel('True Value', size = 12)
    plt.title('True Values vs Predicted Values', size = 13)

    plt.text(110, 15, f'R-Squared = {r2:.2f}', fontweight = 'bold')
    if pvalue < 0.001:
      plt.text(110, 5, f'Pearson Correlation = {pearson:.2f} (p < 0.001)', fontweight = 'bold')
    else:
      plt.text(110, 5, f'Pearson Correlation = {pearson:.2f} (p = {pvalue:.4f})', fontweight = 'bold')

    plt.legend(loc = 'upper left')

    plt.show()

    # save figure
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + self.plot_name + '_' + self.split + '_r2.png')

In [None]:
# class to calculate MSE
class MSE(Metric):

  def forward(self, y_true, y_pred = None, p_pred = None, plot = False):

    # calculate MSE
    mse = mean_squared_error(y_true, y_pred)

    return mse

In [None]:
# class to calculate MAE
class MAE(Metric):

  def forward(self, y_true, y_pred = None, p_pred = None, plot = False):

    # calculate MAE
    mae = mean_absolute_error(y_true, y_pred)

    return mae

In [None]:
# class to calculate MAPE
class MAPE(Metric):

  def forward(self, y_true, y_pred = None, p_pred = None, plot = False):

    # calculate MAPE
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    return mape

In [None]:
# class to plot the train and validation loss
class PlotLoss:

  def __init__(self, plot_name):

    # save plot name as a variable
    self.plot_name = plot_name

  def __call__(self, train_losses, val_losses):

    # convert train losses and val losses to numpy arrays
    train_losses = to_np_array(train_losses)
    val_losses = to_np_array(val_losses)

    # run the plotting code
    self.forward(train_losses, val_losses, self.plot_name)

  def forward(self, train_losses, val_losses, plot_name):

    # plot the train losses and val losses over epochs
    plt.figure(figsize = (6, 4))

    plt.plot(range(len(train_losses)), train_losses, lw = 2, color = 'navy', label = 'Train Loss')

    plt.plot(range(len(val_losses)), val_losses, lw = 2, color = 'darkorange', label = 'Val Loss')

    plt.title(f'Loss Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')

    plt.legend(loc = 'lower right')

    plt.xticks(ticks = range(len(train_losses)), labels = range(1, len(train_losses) + 1))

    plt.show()

    # save figure
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + plot_name + '_loss.png')

In [None]:
# define class for plotting the metrics across epochs
class PlotMetrics:

  def __init__(self, plot_name):
    # save plot name as a variable
    self.plot_name = plot_name

  def __call__(self, train_metric, val_metric):

    # convert train metrics and val metrics to numpy arrays
    train_metric = to_np_array(train_metric)
    val_metric = to_np_array(val_metric)

    # run the code to make the plots
    self.forward(train_metric, val_metric, self.plot_name)

  def forward(self, train_metric, val_metric, plot_name):
    print('This is the base for plotting.')

In [None]:
# class to plot R-squared over epochs
class PlotR2(PlotMetrics):

  def forward(self, train_metric, val_metric, plot_name):

    # plot R-squared over epochs for train and validation
    plt.figure(figsize = (6, 4))

    plt.plot(range(len(train_metric)), train_metric, lw = 2, color = 'navy', label = 'Train R2')

    plt.plot(range(len(val_metric)), val_metric, lw = 2, color = 'darkorange', label = 'Val R2')

    plt.title(f'R2 Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('R2')

    plt.legend(loc = 'lower right')

    plt.xticks(ticks = range(len(train_metric)), labels = range(1, len(train_metric) + 1))

    plt.show()

    # save figure
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + self.plot_name + '_R2_over_epochs.png')

In [None]:
# class to plot Pearson correlation coefficient over epochs
class PlotPearson(PlotMetrics):

  def forward(self, train_metric, val_metric, plot_name):

    # plot train and val pearson correlation coefficient over epochs
    plt.figure(figsize = (6, 4))

    plt.plot(range(len(train_metric)), train_metric, lw = 2, color = 'navy', label = 'Train Pearson')

    plt.plot(range(len(val_metric)), val_metric, lw = 2, color = 'darkorange', label = 'Val Pearson')

    plt.title(f'Pearson Coefficient Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Pearson Coefficient')

    plt.legend(loc = 'lower right')

    plt.xticks(ticks = range(len(train_metric)), labels = range(1, len(train_metric) + 1))

    plt.show()

    # save fig
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + self.plot_name + '_pearson_over_epochs.png')

In [None]:
# class to plot MSE over epochs
class PlotMSE(PlotMetrics):

  def forward(self, train_metric, val_metric, plot_name):

    # plot MSE over epochs for train and val sets
    plt.figure(figsize = (6, 4))

    plt.plot(range(len(train_metric)), train_metric, lw = 2, color = 'navy', label = 'Train MSE')

    plt.plot(range(len(val_metric)), val_metric, lw = 2, color = 'darkorange', label = 'Val MSE')

    plt.title(f'MSE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')

    plt.legend(loc = 'lower right')

    plt.xticks(ticks = range(len(train_metric)), labels = range(1, len(train_metric) + 1))

    plt.show()

    # save fig
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + self.plot_name + '_MSE_over_epochs.png')

In [None]:
# class for plotting MAE over epochs
class PlotMAE(PlotMetrics):

  def forward(self, train_metric, val_metric, plot_name):

    # plot MAE for train and val over epochs
    plt.figure(figsize = (6, 4))

    plt.plot(range(len(train_metric)), train_metric, lw = 2, color = 'navy', label = 'Train MAE')

    plt.plot(range(len(val_metric)), val_metric, lw = 2, color = 'darkorange', label = 'Val MAE')

    plt.title(f'MAE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')

    plt.legend(loc = 'lower right')

    plt.xticks(ticks = range(len(train_metric)), labels = range(1, len(train_metric) + 1))

    plt.show()

    # save fig
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + self.plot_name + '_MAE_over_epochs.png')

In [None]:
# class to plot MAPE over epochs
class PlotMAPE(PlotMetrics):

  def forward(self, train_metric, val_metric, plot_name):

    # plot MAPE over epochs for train and val sets
    plt.figure(figsize = (6, 4))

    plt.plot(range(len(train_metric)), train_metric, lw = 2, color = 'navy', label = 'Train MAPE')

    plt.plot(range(len(val_metric)), val_metric, lw = 2, color = 'darkorange', label = 'Val MAPE')

    plt.title(f'MAPE Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('MAPE')

    plt.legend(loc = 'lower right')

    plt.xticks(ticks = range(len(train_metric)), labels = range(1, len(train_metric) + 1))

    plt.show()

    # save fig
    plt.savefig('/content/drive/MyDrive/BINF_4008_Final_Project/Plots/' + self.plot_name + '_MAPE_over_epochs.png')