In [None]:
# default_exp metrics

In [None]:
#export
import numpy as np
from numba import jit
import pandas as pd
from time_series_model_basics import simulate_data, moving_average, metrics
from typing import List

# Metrics

> Forecasting Metrics.

> Total:  Error, Absolute Error, Squared Error

> Mean: ME, MAE, MSE, RMSE

> Percentage: 


##  Total Error (TE)

>  $ \sum^{n}_{i}y_{i} - \hat{y}_{i}$


In [51]:
#export
def __total_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the total error:

    $\sum^{n}_{i}(ts[i] - f[i]), n = len(ts) = len(f)$

    .Ignores nan values in times-series or the forecast.

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    d = ts - f
    return np.sum(d, where=~np.isnan(d))

In [54]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __total_error(ts=ts, f=f) == 1.

ts, f = np.array([1, 2, 3, np.nan]), np.array([.5, 2.5, 2, 1])
assert __total_error(ts=ts, f=f) == 1

In [55]:
df, _ = moving_average.SMA(
    1,
    df=simulate_data.pandas_time_series(),
)

__total_error(ts= df['time_series'].to_numpy(), f=df['ma_1'].to_numpy())

2.795063162712527

## Absolute Error (AE)

>  $ \sum^{n}_{i} \vert y(t_{i}) - \hat{y}(t_{i})\vert $

In [50]:
#export
def __absolute_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the absolute error:

    $\sum^{n}_{i}|ts[i] - f[i]|, n = len(ts) = len(f)$

    .Ignores nan values in times-series or the forecast.

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    d = np.abs(ts - f)

    return np.sum(d, where=~np.isnan(d))

In [58]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __absolute_error(ts=ts, f=f) == 2

ts, f = np.array([np.nan, 1, 2, 3]), np.array([100, .5, 2.5, 2])
assert __absolute_error(ts=ts, f=f) == 2

## Squared Error (SE)
>  $ \sum^{n}_{i} \vert y(t_{i}) - \hat{y}(t_{i})\vert^2 $

In [57]:
#export
def __squared_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the squared error:

    $\sum^{n}_{i}|ts[i] - f[i]|**2, n = len(ts) = len(f)$

    .Ignores nan values in times-series or the forecast.

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    d2 = (ts - f)**2
    return np.sum(d2, where=~np.isnan(d2))

In [59]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __squared_error(ts=ts, f=f) == 2 * (.5)**2 + 1

ts, f = np.array([1, 2, np.nan, 3]), np.array([.5, 2.5, 10**3, 2])
assert __squared_error(ts=ts, f=f) == 2 * (.5)**2 + 1

## Mean Error
>  $ \frac{1}{n}\sum^{n}_{i}y_{i} - \hat{y}_{i}$

In [61]:
#export
def __mean_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the mean error:

    $(1/n)\sum^{n*}_{i}ts[i] - f[i], n* = len(ts) = len(f)$

    .Ignores nan values in times-series or the forecast.
     Value n is n* minus the ignored values

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """
    d = ts - f
    w = ~np.isnan(d)
    n = len(d[w])

    return np.divide(
        np.sum(d, where=w),
        n,
        where=n > 0,
    )

In [62]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __mean_error(ts=ts, f=f) == (1 / 3) * (1)

ts, f = np.array([1, 2, 10**3, 3]), np.array([.5, 2.5, np.nan, 2])
assert __mean_error(ts=ts, f=f) == (1 / 3) * (1)

ts, f = np.array([np.nan, np.nan]), np.array([.5, 2.5])
assert abs(__mean_error(ts=ts, f=f)) < 10**(-20)

## Mean Absolute Error (MAE)

>  $ \frac{1}{n}\sum^{n}_{i} \vert y(t_{i}) - \hat{y}(t_{i})\vert $

In [63]:
#export
def __mean_absolute_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the mean absolute error:

    $$(1/n)\sum^{n}_{i} | ts[i] - f[i]|, n = len(ts) = len(f)$$

    .Ignores nan values in times-series or the forecast.

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    d = np.abs(ts - f)
    w = ~np.isnan(d)
    n = len(d[w])

    return np.divide(
        np.sum(d, where=w),
        n,
        where=n > 0,
    )

In [64]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __mean_absolute_error(ts=ts, f=f) == (1 / 3)*(2)


ts, f = np.array([np.nan, np.nan]), np.array([.5, 2.5])
assert abs(__mean_absolute_error(ts=ts, f=f)) < 10**(-20)

## Mean Squared Error (MSE)

> $\frac{1}{n} \sum^{n}_{i}(y_{i} - \hat{y}_{i})^{2}$

In [66]:
#export
def __mean_squared_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the mean squared error:

    $$(1/n)\sum^{n}_{i} | ts[i] - f[i]|**2, n = len(ts) = len(f)$$

    .Ignores nan values in times-series or the forecast.

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """
    d = (ts - f)**2
    w = ~np.isnan(d)
    n = len(d[w])

    return np.divide(
        np.sum(d, where=w),
        n,
        where=n > 0,
    )

In [67]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __mean_squared_error(ts=ts, f=f) == (1 / 3) * (2 * .5**2 + 1)

ts, f = np.array([np.nan, np.nan]), np.array([.5, 2.5])
assert __mean_squared_error(ts=ts, f=f) < 10**(-20)

## Root Mean Square Error (RMSE)

> $\sqrt{\frac{1}{n} \sum^{n}_{i}(y_{i} - \hat{y}_{i})^{2}}$

In [73]:
#export
def __root_mean_square_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the root mean square error:

    $$\sqrt{(1/n)\sum^{n}_{i} | ts[i] - f[i]|**2},  n = len(ts) = len(f)$$

    .Ignores nan values in times-series or the forecast.

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    return np.sqrt(__mean_squared_error(ts=ts, f=f))

In [74]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __root_mean_square_error(ts=ts, f=f) == np.sqrt((1 / 3) * (2 * .5**2 + 1))

ts, f = np.array([np.nan, np.nan]), np.array([.5, 2.5])
assert __root_mean_square_error(ts=ts, f=f) < 10**(-20)

## Mean Percentage Error (MPE)
>  $\frac{1}{n} \sum^{n}_{i}\frac{y_{i} - \hat{y}_{i}} { y_i} $


In [75]:
#export
def __mean_percentage_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the root mean percentage error:

    $$ (1/n)\sum ( ts[i] - f[i]) /ts[i],  n* = len(ts) = len(f)$$

    .Ignores nan values and division by zero.
    n is n* minus the ignored values

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    pe = np.divide(ts - f, ts, where=ts != 0)
    w = ~np.isnan(pe)

    n = len(pe[w])

    return np.divide(
        np.sum(pe, where=w),
        n,
        where=n > 0,
    )

In [78]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __mean_percentage_error(ts=ts, f=f) == (.5 / 2 + 1 / 3) * (1 / 3)

ts, f = np.array([np.nan, np.nan]), np.array([.5, 2.5])
assert __mean_percentage_error(ts=ts, f=f) < 10**(-20)

## Mean Absolute Percent Error (MAPE)


>  $\frac{1}{n} \sum^{n}_{i}\vert \frac{y_{i} - \hat{y}_{i}} { y_i} \vert $

In [79]:
#export
def __mean_absolute_percent_error(
    *,
    ts: np.array,
    f: np.array,
) -> float:
    """
    Computes the mean absolute percentage error:

    $$ (1/n)\sum | ts[i] - f[i]  /ts[i] \vert,  n* = len(ts) = len(f)$$

    .Ignores nan values and division by zero.
    n is n* minus the ignored values

    ------
    Parameters
    ------

    ts : np.array with the time-series
    f :  np.array with the forecast

    -------
    Returns
    -------
    
    """

    pe = abs(np.divide(ts - f, ts, where=ts != 0))
    w = ~np.isnan(pe)

    n = len(pe[w])

    return np.divide(
        np.sum(pe, where=w),
        n,
        where=n > 0,
    )

In [82]:
ts, f = np.array([1, 2, 3]), np.array([.5, 2.5, 2])
assert __mean_absolute_percent_error(ts=ts,
                                     f=f) == np.divide(.5 * (3. / 2) + (1. / 3),
                                                       3)

ts, f = np.array([np.nan, np.nan]), np.array([.5, 2.5])
assert __mean_percentage_error(ts=ts, f=f) < 10**(-20)

## Summary

> Summary of metrics 

In [88]:
#export
def SUMMARY(
    df: pd.DataFrame = None,
    *,
    val_col: str = None,
    pred_cols: List[str] = None,
) -> pd.DataFrame:
    """
    Summary of Prediction Metrics
    
    -----
    Parameters
    -----
    
    df : dataframe , dafault None. If None it will generate a simulated dataframe.
    val_col : str , default None.  Name of the column with the actual values. 
              It should be provided when a datafre is provided.
    pred_cols : List[str] . Names of the columns with the predictions for the values
              If not provided it will take all dataframe columns except val_col
    
    -----
    Returns
    ------
    
    dataframe : Summary results
    
    
    """
    if df is None:
        df = simulate_data.pandas_time_series()
        df, _ = moving_average.SMA(
            1,
            4,
            df=df,
        )

        val_col = 'time_series'

    if val_col is None:
        raise Exception('When df is provided val_col cannot be None')

    if pred_cols is None:
        pred_cols = [x for x in df.columns if x != val_col]

    actual_values = df[val_col].to_numpy()

    metric_names = [fn for fn in metrics.__all__ if fn[:2] == '__']
    result_df = pd.DataFrame()

    for col in pred_cols:
        forecast = df[col].to_numpy()
        result_df[col] = pd.Series({
            ' '.join(fn[2:].split('_')[:-1]): eval(fn)(ts=actual_values,
                                                       f=forecast)
            for fn in metric_names
        })
    result_df.index.name = 'Error'
    return result_df

In [89]:
SUMMARY()

Unnamed: 0_level_0,ma_1,ma_4
Error,Unnamed: 1_level_1,Unnamed: 2_level_1
total,5.014113,7.783203
absolute,21.970438,22.912183
squared,25.868284,30.116424
mean,0.1729,0.299354
mean absolute,0.757601,0.881238
mean squared,0.89201,1.158324
root mean square,0.944463,1.076255
mean percentage,0.016861,0.027478
mean absolute percent,0.127818,0.144065


## Dataframes and Figures

> Generates a Time Series Dataframe and a Figure Object

> The Values of The Time  Series are Simulated

> Includes Forecasting with Moving Averages 