### Equations

(1) Hyperplane Centroid:

$ HC_{k} = {\bf w}^{T}{\bf x}- \frac{1}{ n_{k} } \sum\limits_{y_{i}=k} {\bf w}^{T}{\bf x_{i}}=0 $

(2) Hyperplane Centroid Loss:

$ HCL = \sum\limits_{i=1}^{k-1}=max(HC_{i} -HC_{i+1} + \delta,0) $

(3) Hyperplane Point Loss:

(4) $ HPL_{i}^{+}= max(f(x)-HC)-(HC_{+1}-HC)+\gamma (HC_{+1} - HC),0) $

$ =max(f(x_{i})-\gamma HC - (1-\gamma)HC_{+1},0)  $ 



(5)
$ HPL_{i}^{-}= max(\gamma HC - f(x_{i}) + (1-\gamma)HC_{-1},0)  $

(6)
$ HPL = \sum\limits_{x_{i}\in S} HPL_{i}^{+} + HPL_{i}^{-}$

(7) $ OHPL = \alpha HCL + HPL $

### Define Loss Functions

In [1]:
"""Metrics to assess performance on ordinal classification task given class prediction
   using hyper plane loss techniques 
"""

# Authors: Bob Vanderheyden <rvanderh@us.ibm.com>
#          Ying Xie <yxie2@kennesaw.edu>
#         
# Contributor: Shayan Shamskolahi

import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import numpy as np

def hpall_mean_loss(y_true, y_pred, minlabel, maxlabel, margin=0.1, ordering_loss_weight=1):
    """ Evaluate the ordinal hyperplane ordering loss and point loss of the predictions y_pred\
        (using reduce mean).

        Parameters
        ----------
        y_true : array-like
        y_pred : array-like
        minlabel : integer
        maxlabel : integer
        margin : float
        ordering_loss_weight : float

        Returns
        -------
        loss: float
        A non-negative floating point value (best value is 0.0)
        
        Usage
        -------
        loss = hp_all_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
        print('Loss: ', loss.numpy()) # Loss: 0.7228571
        
        
        Usage with the `compile` API:
        
        ```python
        
        Example Keras wrapper for hp_all_loss:
        
        def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
            def ohpl(y_true, y_pred):
                return hpall_mean_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
            return ohpl

        loss = get_ohpl_wrapper(0,4,.3,0.1)
        
        model = tf.keras.Model(inputs, outputs)
        model.compile(loss=hp_all_loss, optimizer='adam', loss=ohpl_point_loss)
        ```
        
    """
    
    min_label = tf.constant(minlabel, dtype=tf.float32)
    max_label = tf.constant(maxlabel, dtype=tf.float32)
    margin = tf.constant(margin, dtype=tf.float32) # centroid margin
    ordering_loss_weight = tf.constant(ordering_loss_weight, dtype=tf.float32) 
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.dtypes.cast(y_true, y_pred.dtype)
    y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])
    
    # OHPL ordering loss
    # one hot vector for y_true
    ords, idx = tf.unique(tf.reshape(y_true, [-1])) 
    num = tf.shape(ords)[0]
    y_true_1hot = tf.one_hot(idx, num)

    # mean distance for each class
    yO = tf.transpose(y_pred) @ y_true_1hot
    yc = tf.reduce_sum(y_true_1hot,0)
    class_mean = tf.divide(yO,yc)  

    # min. distance
    ords = tf.dtypes.cast(ords, tf.float32)
    ords0 = tf.reshape(ords, [-1,1])
    ords1 = tf.reshape(ords, [1,-1])
    
    min_distance = tf.subtract(ords0, ords1)
    # apply ReLU
    min_distance = tf.nn.relu (min_distance)
    
    # keeps min. distance
    keep = tf.minimum(min_distance,1)

    # distance to centroid     
    class_mean0 = tf.reshape(class_mean, [-1,1])
    class_mean1 = tf.reshape(class_mean, [1,-1])
    class_mean = tf.subtract(class_mean0, class_mean1)  
    # apply ReLU    
    class_mean = tf.nn.relu(class_mean)
    centroid_distance = tf.multiply(keep, class_mean)
    
    hp_ordering_loss = tf.subtract(min_distance,centroid_distance)
    # apply ReLU
    hp_ordering_loss = tf.nn.relu(hp_ordering_loss)
    hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)

    
    # OHPL point loss
    # mean distance for each class
    yO = tf.transpose(y_pred) @ y_true_1hot
    yc = tf.reduce_sum(y_true_1hot,0)
    class_mean = tf.divide(yO,yc) 
 
    # mean by class
    class_mean = tf.reshape(class_mean,[-1,1])
    mean_matrix = y_true_1hot @ class_mean
    
    lower_bound = tf.subtract(min_label,y_true)
    lower_bound = tf.add(lower_bound,1)
    lower_bound = tf.multiply(lower_bound,1e9)
    # apply ReLU    
    lower_bound = tf.nn.relu(lower_bound)
    lower_bound = tf.add(margin, lower_bound)

    upper_bound = tf.subtract(y_true,max_label)
    upper_bound = tf.add(upper_bound,1)
    upper_bound = tf.multiply(upper_bound,1e9)
    # apply ReLU    
    upper_bound = tf.nn.relu(upper_bound)
    upper_bound = tf.add(margin, upper_bound)    

    upper_loss = tf.add(mean_matrix,upper_bound[:,tf.newaxis])
    upper_loss = tf.subtract(y_pred,upper_loss)
    # apply ReLU    
    upper_loss = tf.nn.relu(upper_loss)
    
    lower_loss = tf.add(lower_bound[:,tf.newaxis],y_pred)
    lower_loss = tf.subtract(mean_matrix,lower_loss)
    # apply ReLU    
    lower_loss = tf.nn.relu(lower_loss)
   
    hp_point_loss = tf.add(upper_loss, lower_loss)
    hp_point_loss = tf.reduce_mean(hp_point_loss)

    # aggregate ordering loss and point loss     
    mean_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, (hp_ordering_loss)))
    
    return mean_loss

   
    """    
        References
        ----------
        .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). 
           2018 IEEE International Conference on Big Data (Big Data), 
           2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079
    """

In [2]:
def hpall_sum_loss(y_true, y_pred, minlabel, maxlabel, margin=0.1, ordering_loss_weight=1):
    """ Evaluate the ordinal hyperplane ordering loss and point loss of the predictions y_pred\
        (using reduce sum).

        Parameters
        ----------
        y_true : array-like
        y_pred : array-like
        minlabel : integer
        maxlabel : integer
        margin : float
        ordering_loss_weight : float

        Returns
        -------
        loss: float
        A non-negative floating point value (best value is 0.0)
        
        Usage
        -------
        loss = hp_all_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
        print('Loss: ', loss.numpy()) # Loss: 3.48
        
        
        Usage with the `compile` API:
        
        ```python
        
        Example Keras wrapper for hp_all_loss:
        
        def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
            def ohpl(y_true, y_pred):
                return hpall_sum_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
            return ohpl

        loss = get_ohpl_wrapper(0,4,.3,0.1)
        
        model = tf.keras.Model(inputs, outputs)
        model.compile(loss=hp_all_loss, optimizer='adam', loss=ohpl_point_loss)
        ```
        
    """
    
    min_label = tf.constant(minlabel, dtype=tf.float32)
    max_label = tf.constant(maxlabel, dtype=tf.float32)
    margin = tf.constant(margin, dtype=tf.float32) # centroid margin
    ordering_loss_weight = tf.constant(ordering_loss_weight, dtype=tf.float32) 
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.dtypes.cast(y_true, y_pred.dtype)
    y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])
    
    # OHPL ordering loss
    # one hot vector for y_true
    ords, idx = tf.unique(tf.reshape(y_true, [-1])) 
    num = tf.shape(ords)[0]
    y_true_1hot = tf.one_hot(idx, num)

    # mean distance for each class
    yO = tf.transpose(y_pred) @ y_true_1hot
    yc = tf.reduce_sum(y_true_1hot,0)
    class_mean = tf.divide(yO,yc)  

    # min. distance
    ords = tf.dtypes.cast(ords, tf.float32)
    ords0 = tf.reshape(ords, [-1,1])
    ords1 = tf.reshape(ords, [1,-1])
    
    min_distance = tf.subtract(ords0, ords1)
    # apply ReLU
    min_distance = tf.nn.relu (min_distance)
    
    # keeps min. distance
    keep = tf.minimum(min_distance,1)

    # distance to centroid     
    class_mean0 = tf.reshape(class_mean, [-1,1])
    class_mean1 = tf.reshape(class_mean, [1,-1])
    class_mean = tf.subtract(class_mean0, class_mean1)  
    # apply ReLU    
    class_mean = tf.nn.relu(class_mean)
    centroid_distance = tf.multiply(keep, class_mean)
    
    hp_ordering_loss = tf.subtract(min_distance,centroid_distance)
    # apply ReLU
    hp_ordering_loss = tf.nn.relu(hp_ordering_loss)
    hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)

    
    # OHPL point loss
    # mean distance for each class
    yO = tf.transpose(y_pred) @ y_true_1hot
    yc = tf.reduce_sum(y_true_1hot,0)
    class_mean = tf.divide(yO,yc) 
 
    # mean by class
    class_mean = tf.reshape(class_mean,[-1,1])
    mean_matrix = y_true_1hot @ class_mean
    
    lower_bound = tf.subtract(min_label,y_true)
    lower_bound = tf.add(lower_bound,1)
    lower_bound = tf.multiply(lower_bound,1e9)
    # apply ReLU    
    lower_bound = tf.nn.relu(lower_bound)
    lower_bound = tf.add(margin, lower_bound)

    upper_bound = tf.subtract(y_true,max_label)
    upper_bound = tf.add(lower_bound,1)
    upper_bound = tf.multiply(lower_bound,1e9)
    # apply ReLU    
    upper_bound = tf.nn.relu(lower_bound)
    upper_bound = tf.add(margin, lower_bound)    

    upper_loss = tf.add(mean_matrix,upper_bound[:,tf.newaxis])
    upper_loss = tf.subtract(y_pred,upper_loss)
    # apply ReLU    
    upper_loss = tf.nn.relu(upper_loss)
    
    lower_loss = tf.add(lower_bound[:,tf.newaxis],mean_matrix)
    lower_loss = tf.subtract(y_pred,lower_loss)
    # apply ReLU    
    lower_loss = tf.nn.relu(lower_loss)
   
    hp_point_loss = tf.add(upper_loss, lower_loss)
    hp_point_loss = tf.reduce_sum(hp_point_loss)

    # aggregate ordering loss and point loss     
    sum_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, hp_ordering_loss))
    
    return sum_loss


    """    
        References
        ----------
        .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). 
           2018 IEEE International Conference on Big Data (Big Data), 
           2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079
    """

### Test the result:

In [3]:
loss = hpall_mean_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
print('Loss: ', loss.numpy()) # Loss: 0.7228571

Loss:  0.7228571


In [4]:
loss = hpall_sum_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
print('Loss: ', loss.numpy()) # Loss: 3.48

Loss:  3.48


### Example wrapper for Keras (mean loss):

In [25]:
# example Keras wrapper for hpall_mean_loss

def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
    def ohpl(y_true, y_pred):
        return hpall_mean_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
    return ohpl

loss = get_ohpl_wrapper(1,9,.3,0.1)

### Wrapper in action - Keras sequential model:

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

pwd = !pwd
df = pd.read_csv(pwd[0]+'/datasets-arie_ben_david-era.csv', header=None, sep = ',')

X = df.iloc[:,:4]
y = df.iloc[:,4]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

model = Sequential()
model.add(Dense(40, activation='relu', input_shape=(4, )))
model.add(Dropout(0.1))
model.add(Dense(24, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(28, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(loss=loss, optimizer="adam")
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=2, batch_size=1)

Train on 670 samples, validate on 330 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f6ecc538b50>

### Example wrapper for Keras (sum loss):

In [31]:
# example Keras wrapper for hpall_sum_loss

def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
    def ohpl(y_true, y_pred):
        return hpall_sum_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
    return ohpl

loss = get_ohpl_wrapper(1,9,.3,0.1)

### Wrapper in action - Keras sequential model:

In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

pwd = !pwd
df = pd.read_csv(pwd[0]+'/datasets-arie_ben_david-era.csv', header=None, sep = ',')

X = df.iloc[:,:4]
y = df.iloc[:,4]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

model = Sequential()
model.add(Dense(40, activation='relu', input_shape=(4, )))
model.add(Dropout(0.1))
model.add(Dense(24, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(28, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(loss=loss, optimizer="adam")
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=2, batch_size=1)

Train on 670 samples, validate on 330 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f6ec6c58d90>

### Alternative Keras implementation using subclassing for mean loss

In [129]:
from tensorflow import keras

class OHPLall_mean(keras.losses.Loss):
    def __init__(self, minlabel, maxlabel, margin, ordering_loss_weight, **kwargs):
        self.minlabel = minlabel
        self.maxlabel = maxlabel
        self.margin = margin
        self.ordering_loss_weight = ordering_loss_weight
        super().__init__(**kwargs)
        
    def call(self, y_true, y_pred):
        min_label = tf.constant(self.minlabel, dtype=tf.float32)
        max_label = tf.constant(self.maxlabel, dtype=tf.float32)
        margin = tf.constant(self.margin, dtype=tf.float32) # centroid margin
        ordering_loss_weight = tf.constant(self.ordering_loss_weight, dtype=tf.float32) 

        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.dtypes.cast(y_true, y_pred.dtype)
        y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])

        # OHPL ordering loss
        # one hot vector for y_true
        ords, idx = tf.unique(tf.reshape(y_true, [-1])) 
        num = tf.shape(ords)[0]
        y_true_1hot = tf.one_hot(idx, num)

        # mean distance for each class
        yO = tf.transpose(y_pred) @ y_true_1hot
        yc = tf.reduce_sum(y_true_1hot,0)
        class_mean = tf.divide(yO,yc)  

        # min. distance
        ords = tf.dtypes.cast(ords, tf.float32)
        ords0 = tf.reshape(ords, [-1,1])
        ords1 = tf.reshape(ords, [1,-1])

        min_distance = tf.subtract(ords0, ords1)
        # apply ReLU
        min_distance = tf.nn.relu (min_distance)

        # keeps min. distance
        keep = tf.minimum(min_distance,1)

        # distance to centroid     
        class_mean0 = tf.reshape(class_mean, [-1,1])
        class_mean1 = tf.reshape(class_mean, [1,-1])
        class_mean = tf.subtract(class_mean0, class_mean1)  
        # apply ReLU    
        class_mean = tf.nn.relu(class_mean)
        centroid_distance = tf.multiply(keep, class_mean)

        hp_ordering_loss = tf.subtract(min_distance,centroid_distance)
        # apply ReLU
        hp_ordering_loss = tf.nn.relu(hp_ordering_loss)
        hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)


        # OHPL point loss
        # mean distance for each class
        yO = tf.transpose(y_pred) @ y_true_1hot
        yc = tf.reduce_sum(y_true_1hot,0)
        class_mean = tf.divide(yO,yc) 

        # mean by class
        class_mean = tf.reshape(class_mean,[-1,1])
        mean_matrix = y_true_1hot @ class_mean

        lower_bound = tf.subtract(min_label,y_true)
        lower_bound = tf.add(lower_bound,1)
        lower_bound = tf.multiply(lower_bound,1e9)
        # apply ReLU    
        lower_bound = tf.nn.relu(lower_bound)
        lower_bound = tf.add(margin, lower_bound)

        upper_bound = tf.subtract(y_true,max_label)
        upper_bound = tf.add(upper_bound,1)
        upper_bound = tf.multiply(upper_bound,1e9)
        # apply ReLU    
        upper_bound = tf.nn.relu(upper_bound)
        upper_bound = tf.add(margin, upper_bound)    

        upper_loss = tf.add(mean_matrix,upper_bound[:,tf.newaxis])
        upper_loss = tf.subtract(y_pred,upper_loss)
        # apply ReLU    
        upper_loss = tf.nn.relu(upper_loss)

        lower_loss = tf.add(lower_bound[:,tf.newaxis],y_pred)
        lower_loss = tf.subtract(mean_matrix,lower_loss)
        # apply ReLU    
        lower_loss = tf.nn.relu(lower_loss)

        hp_point_loss = tf.add(upper_loss, lower_loss)
        hp_point_loss = tf.reduce_mean(hp_point_loss) # for sum loss, replace with tf.reduce_sum()

        # aggregate ordering loss and point loss     
        mean_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, (hp_ordering_loss)))

        return mean_loss


### Subclassing in action - Keras sequential model:

In [130]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

pwd = !pwd
df = pd.read_csv(pwd[0]+'/datasets-arie_ben_david-era.csv', header=None, sep = ',')

X = df.iloc[:,:4]
y = df.iloc[:,4]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

model = Sequential()
model.add(Dense(40, activation='relu', input_shape=(4, )))
model.add(Dropout(0.1))
model.add(Dense(24, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(28, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(loss=OHPLall_mean(1,9,0.1,1), optimizer="adam")
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=2, batch_size=1)

Train on 670 samples, validate on 330 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f2773713b90>