In [None]:
import numpy as np
from sklearn.metrics import mean_squared_log_error

In [None]:
def rmsle(y_true: np.array, y_pred: np.array) -> float:
    log_error = np.log1p(y_pred) - np.log1p(y_true)
    return np.sqrt(np.mean(log_error ** 2))


def rmse(y_true: np.array, y_pred: np.array) -> float:
    error = y_pred - y_true
    return np.sqrt(np.mean(error ** 2))

In [None]:
n_samples = 100
y_true = np.ones(shape=n_samples)
y_pred = np.ones(shape=n_samples)

rmsle(y_true, y_pred)

In [None]:
n_samples = 1000
y_true = np.random.normal(loc=0, scale=1, size=n_samples)
y_pred = np.random.normal(loc=5, scale=2, size=n_samples)
rmsle(np.abs(y_true), np.abs(y_pred))

In [None]:
n_samples = 1000

y_true = np.ones(shape=n_samples)
y_pred = np.ones(shape=n_samples)
expected = mean_squared_log_error(y_true, y_pred) ** 0.5
assert rmsle(y_true, y_pred) == expected


y_true = np.random.normal(loc=0, scale=1, size=n_samples)
y_pred = np.random.normal(loc=5, scale=2, size=n_samples)
expected = mean_squared_log_error(np.abs(y_true), np.abs(y_pred)) ** 0.5
assert rmsle(np.abs(y_true), np.abs(y_pred)) == expected

### Outliers

Compared to RMSE, RMSLE is less sensitive to extreme / outlier errors.

In [None]:
y_true = np.array([10, 20, 30])
y_pred = np.array([11, 18, 35])
print(f"RMSE no outliers: {rmse(y_true, y_pred):.3f}")
print(f"RMSLE no outliers: {rmsle(y_true, y_pred):.3f}")

print("======")

y_true = np.array([10, 20, 30, 40])
y_pred = np.array([11, 18, 35, 400])
print(f"RMSE no outliers: {rmse(y_true, y_pred):.3f}")
print(f"RMSLE no outliers: {rmsle(y_true, y_pred):.3f}")

### Relative Error

Compared to RMSE, RMSLE is less senstive to differences in scale of error. For RMSLE, predicting 1.01 for a true value of 1 has a similar error as predicting 101 for a true value of 100. For RMSE on the other hand, the latter is 100x as bad.

In [None]:
rmsle(np.array([1.00]), np.array([1.10]))

In [None]:
y_true = np.array([10, 20, 30])
y_pred = y_true * 1.1
print(f"RMSE: {rmse(y_true, y_pred):.3f}")
print(f"RMSLE: {rmsle(y_true, y_pred):.3f}")

print("======")

y_true = y_true * 1000
y_pred = y_true * 1.1
print(f"RMSE x1000: {rmse(y_true, y_pred):.3f}")
print(f"RMSLE x1000: {rmsle(y_true, y_pred):.3f}")

### Biased Penalty

Compared to RMSE, RMSLE penalises underestimation more heavily than overestimation.

In [None]:
y_true = np.array([50, 60, 70])
y_pred = y_true - 10.56
print(f"RMSE under-est.: {rmse(y_true, y_pred):.3f}")
print(f"RMSLE under-est.: {rmsle(y_true, y_pred):.3f}")

print("======")

y_true = np.array([50, 60, 70])
y_pred = y_true + 10.56
print(f"RMSE over-est.: {rmse(y_true, y_pred):.3f}")
print(f"RMSLE over-est.: {rmsle(y_true, y_pred):.3f}")