In [49]:
from pandas import read_csv
from sklearn.metrics import accuracy_score, precision_score, recall_score
from math import log, exp

# Acuratetea, precizia, rapelul - clasificare multi-clasa

In [50]:
"""
    Compute the accuracy, precision and recall for a clasification problem.
    input:  real_labels - list of all real labels; computed_labes - list of all computed labels;
            label_names - list of all class labels
    output: the accuracy, precision, recall - all float numbers in (0,1)
"""
def evalClassification(real_labels, computed_labels, label_names):

    acc = accuracy_score(real_labels, computed_labels)
    precision = precision_score(real_labels, computed_labels, average = None, labels = label_names)
    recall = recall_score(real_labels, computed_labels, average = None, labels = label_names)
    return acc, precision, recall

In [51]:
data = read_csv('data/flowers.csv')

real_labels = []
computed_labels = []
for real_label, computed_label in zip(data.Type, data.PredictedType):
    real_labels.append(real_label)
    computed_labels.append(computed_label)

accuracy, precision, recall = evalClassification(real_labels, computed_labels, ['Rose', 'Tulip', 'Daisy'])

print('acuratete:', accuracy)
print('precizie rose:', precision[0])
print('rapel rose:', recall[0])
print('precizie tulip:', precision[1])
print('rapel tulip:', recall[1])
print('precizie daisy:', precision[2])
print('rapel daisy:', recall[2])

acuratete: 0.3076923076923077
precizie rose: 0.25
rapel rose: 0.25
precizie tulip: 0.2857142857142857
rapel tulip: 0.2857142857142857
precizie daisy: 0.36363636363636365
rapel daisy: 0.36363636363636365


# Cross-entropy loss - binary-classification problem

In [52]:
"""
    Calculates the cross-entropy loss for a binary-classification problem.
    input: real_labels - list of all real labels; computed_outputs - list of tuples of 2 subunitary values with sum = 1
    output: cross-entropy loss value
"""
def cross_entropy_loss_binary(real_labels, computed_outputs):
    real_outputs = [[1, 0] if label == 'spam' else [0, 1] for label in real_labels]
    dataset_size = len(real_labels)
    no_classes = len(set(real_labels))
    dataset_ce = 0.0
    for i in range(dataset_size):
        sample_ce = - sum([real_outputs[i][j] * log(computed_outputs[i][j]) for j in range(no_classes)])
        dataset_ce += sample_ce
    mean_ce = dataset_ce / dataset_size
    return mean_ce

In [53]:
real_labels =        ['spam', 'spam', 'ham', 'ham', 'spam', 'ham', 'spam', 'ham', 'spam', 'spam']
computed_outputs = [ [0.7, 0.3], [0.2, 0.8], [0.4, 0.6], [0.9, 0.1], [0.7, 0.3], [0.4, 0.6], [0.9, 0.1], [0.2, 0.8], [0.8, 0.2], [0.6, 0.4]]

print('CE Loss:', cross_entropy_loss_binary(real_labels, computed_outputs))

CE Loss: 0.6709497382889827


# Softmax cross-entropy loss - multi-class classification problems

In [54]:
"""
    Calculates the sigmoid cross-entropy loss for multi-label classification problem
    input: real_labels - list of tuples containing binary values, only one being 1; raw_outputs - list of tuples of real values
    output: sigmoid cross-entropy value
"""
def cross_entropy_loss_multi_class(real_labels, raw_outputs):
    ce = 0.0
    for i in range(len(real_labels)):
        no_classes = len(real_labels[i])
        exp_values = [exp(val) for val in raw_outputs[i]]
        map_outputs = [val / sum(exp_values) for val in exp_values]
        sample_ce = - sum([real_labels[i][j] * log(map_outputs[j]) for j in range(no_classes)])
        ce += sample_ce
    return ce

In [55]:
real_labels = [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0, 1]]
raw_outputs = [[10, 1.4, -5.2, 7.6, -4.2], [-2, 6, 3.4, 5, 1], [1.2, 2.2, 1, 5.4, -7.8], [-3, 5, 3, 5, -6], [-2.4, 4, 4.2, 15, -5], [6, 4, -3, 2.3, 7.6]]
print('Softmax CE:', cross_entropy_loss_multi_class(real_labels, raw_outputs))

Softmax CE: 25.093136536679886


# Sigmoid cross-entropy loss - multi-label classification problems

In [56]:
"""
    Calculates the sigmoid cross-entropy loss for multi-label classification problem
    input: real_labels - list of tuples containing binary values; raw_outputs - list of tuples of real values
    output: sigmoid cross-entropy value
"""
def cross_entropy_loss_multi_label(real_labels, raw_outputs):
    ce = 0.0
    for i in range(len(real_labels)):
        no_classes = len(real_labels[i])
        map_outputs = [1 / (1 + exp(-val)) for val in raw_outputs[i]]
        sample_ce = - sum([real_labels[i][j] * log(map_outputs[j]) for j in range(no_classes)])
        ce += sample_ce
    return ce

In [57]:
real_labels = [[0, 0, 1, 0, 1], [0, 1, 1, 0, 0], [1, 0, 1, 0, 0], [0, 0, 1, 1, 0], [1, 1, 1, 0, 0], [0, 0, 0, 1, 0]]
raw_outputs = [[5, 1.4, -5.2, 7.6, -4.2], [-2, 6, 3.4, 5, 1], [1.2, 2.2, 1, 5.4, -7.8], [-3, 5, 3, 5, -6], [-2.4, 4, 4.2, 15, -5], [6, 4, -3, 2.3, 7.6]]

print('Sigmoid CE:', cross_entropy_loss_multi_label(real_labels, raw_outputs))

Sigmoid CE: 12.702952468404927
