In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
y_true = np.array(
    [[1],
    [1],
    [1],
    [0]])

print(f"y_true:\n{y_true}")

y_true:
[[1]
 [1]
 [1]
 [0]]


### Two models
To better understand the loss function, you will pretend that you have two models.
- Model 1 always outputs a 0.9 for any example that it's given.  
- Model 2 always outputs a 0.1 for any example that it's given.

In [5]:
y_pred_1 = 0.9 * np.ones(y_true.shape)
y_pred_2 = 0.1 * np.ones(y_true.shape)

In [6]:
y_pred_1

array([[0.9],
       [0.9],
       [0.9],
       [0.9]])

In [7]:
y_pred_2

array([[0.1],
       [0.1],
       [0.1],
       [0.1]])

### Problems with the regular loss function
The learning goal here is to notice that with a regular loss function (not a weighted loss), the model that always outputs 0.9 has a smaller loss (performs better) than model 2.
- This is because there is a class imbalance, where 3 out of the 4 labels are 1.
- If the data were perfectly balanced, (two labels were 1, and two labels were 0), model 1 and model 2 would have the same loss.  Each would get two examples correct and two examples incorrect.
- However, since the data is not balanced, the regular loss function implies that model 1 is better than model 2.

### Notice the shortcomings of a regular non-weighted loss

See what loss you get from these two models (model 1 always predicts 0.9, and model 2 always predicts 0.1), see what the regular (unweighted) loss function is for each model.

In [10]:
loss_reg_1 = -1 * np.sum(y_true * np.log(y_pred_1)) + \
            -1 * np.sum((1 - y_true) * np.log(1 - y_pred_1))
loss_reg_1

2.618666639967525

In [12]:
loss_reg_2 = -1 * np.sum(y_true * np.log(y_pred_2)) + \
            -1 * np.sum((1 - y_true) * np.log(1 - y_pred_2))
loss_reg_2

7.013115794639963

In [13]:
w_p = 1/4
w_n = 3/4

In [17]:
loss_reg_1 = -1 * np.sum(y_true * w_p * np.log(y_pred_1)) + \
            -1 * np.sum((1 - y_true) * w_n * np.log(1 - y_pred_1))
loss_reg_1

1.805959206488904

In [18]:
loss_reg_2 = -1 * np.sum(y_true * w_p * np.log(y_pred_2)) + \
            -1 * np.sum((1 - y_true) * w_n * np.log(1 - y_pred_2))

In [19]:
loss_reg_2

1.8059592064889038

In [20]:
y_true = np.array(
    [[1,0],
    [1,0],
    [1,0],
    [0,1],
    [1,0]])

In [21]:
y_true

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 0]])

In [23]:
np.sum(y_true, axis=0)

array([4, 1])

In [24]:
np.sum(y_true, axis=1)

array([1, 1, 1, 1, 1])

In [26]:
w_p = np.sum(y_true == 0, axis=0) / y_true.shape[0]
w_p

array([0.2, 0.8])

In [28]:
w_n = np.sum(y_true == 1, axis=0) / y_true.shape[0]
w_n

array([0.8, 0.2])

In [29]:
y_pred = np.ones(y_true.shape)
y_pred[:, 0] = 0.3 * y_pred[:, 0]
y_pred[:, 1] = 0.7 * y_pred[:, 1]

In [30]:
y_pred

array([[0.3, 0.7],
       [0.3, 0.7],
       [0.3, 0.7],
       [0.3, 0.7],
       [0.3, 0.7]])

In [34]:
loss_0_pos = -1 * np.sum(w_p[0] *
                    y_true[:, 0] *
                    np.log(y_pred[:, 0]))
loss_0_pos

0.963178243460749

In [35]:
loss_0_neg = -1 * np.sum(w_n[0] * 
                        (1 - y_true[:, 0]) *
                        np.log(1 - y_pred[:, 0]))
loss_0_neg

0.285339955150986

In [36]:
loss_0 = loss_0_pos + loss_0_neg

In [37]:
loss_0

1.2485181986117349

In [11]:
y_true = np.array(
[[1],
[1]])
y_true

array([[1],
       [1]])

In [12]:
y_pred = np.array(
[[0.6],
[0.3]])
y_pred

array([[0.6],
       [0.3]])

In [13]:
loss = -1 * np.sum(1/3 * y_true * np.log(y_pred)) + \
        -1 * np.sum(2/3 * (1 - y_true) * np.log(1 - y_pred))

In [14]:
loss

1.7147984280919268