### Define Loss Functions

In [1]:
"""Metrics to assess performance on ordinal classification task given class prediction
   using hyper plane loss techniques 
"""

# Authors: Bob Vanderheyden <rvanderh@us.ibm.com>
#          Ying Xie <yxie2@kennesaw.edu>
#         
# Contributor: Shayan Shamskolahi

import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import numpy as np

def hpall_mean_loss(y_true, y_pred, minlabel, maxlabel, margin=0.1, ordering_loss_weight=1):
    """ Evaluate the ordinal hyperplane ordering loss and point loss of the predictions y_pred\
        (using reduce mean).

        Parameters
        ----------
        y_true : array-like
        y_pred : array-like
        minlabel : integer
        maxlabel : integer
        margin : float
        ordering_loss_weight : float

        Returns
        -------
        loss: float
        A non-negative floating point value (best value is 0.0)
        
        Usage
        -------
        loss = hp_all_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
        print('Loss: ', loss.numpy()) # Loss: 0.7228571
        
        
        Usage with the `compile` API:
        
        ```python
        
        Example Keras wrapper for hp_all_loss:
        
        def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
            def ohpl(y_true, y_pred):
                return hpall_mean_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
            return ohpl

        loss = get_ohpl_wrapper(2,7,.3,1) # ordering_loss_weight must not be less that 1
        
        model = tf.keras.Model(inputs, outputs)
        model.compile(loss=hp_all_loss, optimizer='adam', loss=ohpl_point_loss)
        ```
        
    """
    
    min_label = tf.constant(minlabel, dtype=tf.float32)
    max_label = tf.constant(maxlabel, dtype=tf.float32)
    margin = tf.constant(margin, dtype=tf.float32) # centroid margin
    ordering_loss_weight = tf.constant(ordering_loss_weight, dtype=tf.float32) 
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.dtypes.cast(y_true, y_pred.dtype)
    y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])
    
    # OHPL ordering loss
    # one hot vector for y_true
    ords, idx = tf.unique(tf.reshape(y_true, [-1])) 
    num = tf.shape(ords)[0]
    y_true_1hot = tf.one_hot(idx, num)

    # mean distance for each class
    yO = tf.matmul(tf.transpose(y_pred),y_true_1hot)
    yc = tf.reduce_sum(y_true_1hot,0)
    class_mean = tf.divide(yO,yc)  

    # min. distance
    ords = tf.dtypes.cast(ords, tf.float32)
    ords0 = tf.reshape(ords, [-1,1])
    ords1 = tf.reshape(ords, [1,-1])
    
    min_distance = tf.subtract(ords0, ords1)
    # apply ReLU
    min_distance = tf.nn.relu (min_distance)
    
    # keeps min. distance
    keep = tf.minimum(min_distance,1)

    # distance to centroid     
    class_mean0 = tf.reshape(class_mean, [-1,1])
    class_mean1 = tf.reshape(class_mean, [1,-1])
    class_mean = tf.subtract(class_mean0, class_mean1)  
    # apply ReLU    
    class_mean = tf.nn.relu(class_mean)
    centroid_distance = tf.multiply(keep, class_mean)
    
    hp_ordering_loss = tf.subtract(min_distance,centroid_distance)
    # apply ReLU
    hp_ordering_loss = tf.nn.relu(hp_ordering_loss)
    hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)
    
    # OHPL point loss
    # Centroid for point
    point_cent = tf.matmul(y_true_1hot, class_mean0)
    
    lower_bound = tf.subtract(min_label,y_true)
    lower_bound = tf.add(lower_bound,1)
    lower_bound = tf.multiply(lower_bound,1e9)
    # apply ReLU    
    lower_bound = tf.nn.relu(lower_bound)
    lower_bound = tf.add(margin, lower_bound)

    upper_bound = tf.subtract(y_true,max_label)
    upper_bound = tf.add(upper_bound,1)
    upper_bound = tf.multiply(upper_bound,1e9)
    # apply ReLU    
    upper_bound = tf.nn.relu(upper_bound)
    upper_bound = tf.add(margin, upper_bound)    

    upper_loss = tf.add(point_cent,upper_bound[:,tf.newaxis])
    upper_loss = tf.subtract(y_pred,upper_loss)
    # apply ReLU    
    upper_loss = tf.nn.relu(upper_loss)
    
    lower_loss = tf.add(lower_bound[:,tf.newaxis],y_pred)
    lower_loss = tf.subtract(point_cent,lower_loss)
    # apply ReLU    
    lower_loss = tf.nn.relu(lower_loss)
   
    hp_point_loss = tf.add(upper_loss, lower_loss)
    hp_point_loss = tf.reduce_mean(hp_point_loss)

    # aggregate ordering loss and point loss     
    mean_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, hp_ordering_loss))
    
    return mean_loss

   
    """    
        References
        ----------
        .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). 
           2018 IEEE International Conference on Big Data (Big Data), 
           2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079
    """

In [2]:
def hpall_sum_loss(y_true, y_pred, minlabel, maxlabel, margin=0.1, ordering_loss_weight=1):
    """ Evaluate the ordinal hyperplane ordering loss and point loss of the predictions y_pred\
        (using reduce sum).

        Parameters
        ----------
        y_true : array-like
        y_pred : array-like
        minlabel : integer
        maxlabel : integer
        margin : float
        ordering_loss_weight : float

        Returns
        -------
        loss: float
        A non-negative floating point value (best value is 0.0)
        
        Usage
        -------
        loss = hp_all_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
        print('Loss: ', loss.numpy()) # Loss: 3.48
        
        
        Usage with the `compile` API:
        
        ```python
        
        Example Keras wrapper for hp_all_loss:
        
        def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
            def ohpl(y_true, y_pred):
                return hpall_sum_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
            return ohpl

        loss = get_ohpl_wrapper(0,4,1,1)
        
        model = tf.keras.Model(inputs, outputs)
        model.compile(loss=hp_all_loss, optimizer='adam', loss=ohpl_point_loss)
        ```
        
    """
    
    min_label = tf.constant(minlabel, dtype=tf.float32)
    max_label = tf.constant(maxlabel, dtype=tf.float32)
    margin = tf.constant(margin, dtype=tf.float32) # centroid margin
    ordering_loss_weight = tf.constant(ordering_loss_weight, dtype=tf.float32) 
    
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.dtypes.cast(y_true, y_pred.dtype)
    y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])
    
    # OHPL ordering loss
    # one hot vector for y_true
    ords, idx = tf.unique(tf.reshape(y_true, [-1])) 
    num = tf.shape(ords)[0]
    y_true_1hot = tf.one_hot(idx, num)

    # mean distance for each class
    yO = tf.matmul(tf.transpose(y_pred),y_true_1hot)
    yc = tf.reduce_sum(y_true_1hot,0)
    class_mean = tf.divide(yO,yc)  

    # min. distance
    ords = tf.dtypes.cast(ords, tf.float32)
    ords0 = tf.reshape(ords, [-1,1])
    ords1 = tf.reshape(ords, [1,-1])
    
    min_distance = tf.subtract(ords0, ords1)
    # apply ReLU
    min_distance = tf.nn.relu (min_distance)
    
    # keeps min. distance
    keep = tf.minimum(min_distance,1)

    # distance to centroid     
    class_mean0 = tf.reshape(class_mean, [-1,1])
    class_mean1 = tf.reshape(class_mean, [1,-1])
    class_mean = tf.subtract(class_mean0, class_mean1)  
    # apply ReLU    
    class_mean = tf.nn.relu(class_mean)
    centroid_distance = tf.multiply(keep, class_mean)
    
    hp_ordering_loss = tf.subtract(min_distance,centroid_distance)
    # apply ReLU
    hp_ordering_loss = tf.nn.relu(hp_ordering_loss)
    hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)
    
    # OHPL point loss
    # Centroid for point
    point_cent = tf.matmul(y_true_1hot, class_mean0)
    
    lower_bound = tf.subtract(min_label,y_true)
    lower_bound = tf.add(lower_bound,1)
    lower_bound = tf.multiply(lower_bound,1e9)
    # apply ReLU    
    lower_bound = tf.nn.relu(lower_bound)
    lower_bound = tf.add(margin, lower_bound)

    upper_bound = tf.subtract(y_true,max_label)
    upper_bound = tf.add(upper_bound,1)
    upper_bound = tf.multiply(upper_bound,1e9)
    # apply ReLU    
    upper_bound = tf.nn.relu(upper_bound)
    upper_bound = tf.add(margin, upper_bound)    

    upper_loss = tf.add(point_cent,upper_bound[:,tf.newaxis])
    upper_loss = tf.subtract(y_pred,upper_loss)
    # apply ReLU    
    upper_loss = tf.nn.relu(upper_loss)
    
    lower_loss = tf.add(lower_bound[:,tf.newaxis],y_pred)
    lower_loss = tf.subtract(point_cent,lower_loss)
    # apply ReLU    
    lower_loss = tf.nn.relu(lower_loss)
   
    hp_point_loss = tf.add(upper_loss, lower_loss)
    hp_point_loss = tf.reduce_sum(hp_point_loss)

    # aggregate ordering loss and point loss     
    sum_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, hp_ordering_loss))
    
    return sum_loss


    """    
        References
        ----------
        .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). 
           2018 IEEE International Conference on Big Data (Big Data), 
           2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079
    """

### Test the result:

In [3]:
loss = hpall_mean_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
print('Loss: ', loss.numpy()) # Loss: 0.7228571

Loss:  0.7228571


In [4]:
loss = hpall_sum_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)
print('Loss: ', loss.numpy()) # Loss: 3.48

Loss:  3.48


### Application in Keras (mean loss):

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [6]:
# prepare the data
pwd = !pwd
df = pd.read_csv('world_happiness_2015_2019.csv')
df.Score = df.Score.astype('int32')
df.drop(['Year'], axis=1, inplace=True)
df = df.dropna()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 781 entries, 0 to 781
Data columns (total 7 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Score                         781 non-null    int32  
 1   GDP per capita                781 non-null    float64
 2   Social support                781 non-null    float64
 3   Healthy life expectancy       781 non-null    float64
 4   Freedom to make life choices  781 non-null    float64
 5   Generosity                    781 non-null    float64
 6   Perceptions of corruption     781 non-null    float64
dtypes: float64(6), int32(1)
memory usage: 45.8 KB


In [50]:
X = df.iloc[:,1:]
y = df.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [8]:
np.unique(y, return_counts=True)

(array([2, 3, 4, 5, 6, 7], dtype=int32), array([  7,  89, 202, 249, 162,  72]))

### Wrapper in action - Keras sequential model:

In [9]:
# example Keras wrapper for hpall_mean_loss

def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):
    def ohpl(y_true, y_pred):
        return hpall_mean_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)
    return ohpl

loss = get_ohpl_wrapper(2,7,1,1) # ordering_loss_weight must not be less that 1

In [44]:
# Define and compile the model 

model = Sequential()
model.add(Dense(25, activation='relu', input_shape=(6, )))
model.add(Dropout(0.1))
model.add(Dense(30, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(6, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(loss=loss, optimizer="adam")
model.fit(X_train, y_train, epochs=50, batch_size=5)

Train on 523 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fccc51eb210>

In [51]:
# Define the minimum class
min_class = min(y_train.unique())
y_train = np.array(y_train)

In [52]:
# Create matrix from on hot encoded training labels to use to calculate class centroids
onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
onehot = onehot_encoder.fit_transform(y_train.reshape((-1, 1)))
onehot_inverse = 1/np.sum((onehot.T), axis=1)
new_y_train = onehot.T*onehot_inverse.reshape(-1,1)

In [53]:
# Score the training set
pred = model.predict(X_train, batch_size=5)

In [54]:
# Multiply centroid calculation matrix, new_y_train, by training set scores
train_cent = np.matmul(new_y_train, pred)

In [55]:
# Calculate new data model score
new_pred = model.predict(X_test)

In [56]:
# Identify the closest centroid
rcenter = train_cent.T # create row matrix of centroids
y_pred = np.argmin(abs(new_pred - rcenter), axis=1) + min_class      

In [57]:
# calculate the mean absolute error and mean zero one error
mae = np.mean(abs(y_pred - y_test))
mze = np.mean(abs(y_pred - y_test) > 0)   
print(mae, mze)

0.49224806201550386 0.45348837209302323


In [58]:
# Confusion matrix
from sklearn.metrics import confusion_matrix 
confusion_matrix(y_test, y_pred) 

array([[ 0,  0,  0,  0,  0,  0],
       [ 3, 17,  4,  3,  0,  0],
       [ 2, 15, 29, 19,  0,  0],
       [ 0,  1,  6, 55, 19,  4],
       [ 0,  0,  0, 14, 20, 21],
       [ 0,  0,  0,  0,  6, 20]])