In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

c:\Users\Joaquín Amat\Documents\GitHub\skforecast


# Exponential weighted moving average

In [2]:
import numpy as np
from numba import njit
import numba as nb
import pandas as pd


# @njit
# def ewma(
#     x: np.ndarray, alpha: float, adjust: bool = True, ignore_na: bool = False
# ) -> np.ndarray:
#     """
#     Calculate the Exponentially Weighted Moving Average of an xay. This
#     implementation mimics the pandas implementation of the same function.

#     This implementation is based on the work of rjenc29 in the following
#     link: https://github.com/rjenc29/numerical/blob/master/utilities/ewma.py

#     Parameters
#     ----------
#     x : np.xay
#         The xay to calculate the EWMA.
#     alpha : float
#         The decay factor.
#     adjust : bool, optional
#         Adjust the weights for the bias towards zero. The default is False.
#     ignore_na : bool, optional
#         Ignore missing values. The default is False.

#     Returns
#     -------
#     np.xay
#         The EWMA of the
#     """

#     old_wt_factor = 1.0 - alpha
#     new_wt = 1.0 if adjust else alpha

#     n = x.shape[0]
#     output = np.empty(n)

#     weighted_avg = x[0]
#     is_observation = weighted_avg == weighted_avg
#     nobs = int(is_observation)
#     output[0] = weighted_avg if (nobs >= 1) else np.nan
#     old_wt = 1.0

#     for i in range(1, n):
#         cur = x[i]
#         is_observation = cur == cur
#         nobs += int(is_observation)
#         if weighted_avg == weighted_avg:
#             if is_observation or (not ignore_na):
#                 old_wt *= old_wt_factor
#                 if is_observation:
#                     if weighted_avg != cur:
#                         weighted_avg = (
#                             (old_wt * weighted_avg) + (new_wt * cur)
#                         ) / (old_wt + new_wt)
#                     if adjust:
#                         old_wt += new_wt
#                     else:
#                         old_wt = 1.0
#         elif is_observation:
#             weighted_avg = cur

#         output[i] = weighted_avg if (nobs >= 1) else np.nan

#     return output

In [3]:
# Example usage
# rng = np.random.default_rng(42)
# x = rng.random(50)
# series = pd.Series(x)
# alpha = 0.5  # The alpha value for the ewma

# smoothed_x = ewma(x, alpha, adjust=True)
# print(smoothed_x)
# assert np.allclose(smoothed_x, series.ewm(alpha=alpha, adjust=True).mean())


In [4]:
# %%timeit -n 1000 -r 10
# ewma(x, alpha)

In [5]:
# %%timeit -n 1000 -r 10
# series.ewm(alpha=alpha, adjust=True).mean()

# Exponential weighted mean

In [6]:
import numpy as np

@njit  # This tells Numba to use the fastest mode
def ewm_jit(x: np.ndarray, alpha: float):
    """
    Calculate the exponentially weighted mean of an array.

    Parameters
    ----------
    x : numpy.ndarray
        The input array.
    alpha : float
        The decay factor.

    Returns
    -------
    float
        The exponentially weighted mean.


    """
    if not (0 < alpha <= 1):
        raise ValueError("Alpha should be in the range (0, 1].")
    
    n = len(x)
    weights = 0
    sum_weights = 0
    for i in range(n):
        weight = (1 - alpha) ** (n - 1 - i)
        weights += x[i] * weight
        sum_weights += weight

    return weights / sum_weights

In [7]:
def ewm_numpy(x: np.ndarray, alpha: float):
    """
    Calculate the single exponentially weighted mean of an array using NumPy.

    Parameters
    ----------
    x : np.ndarray
        Input array.
    alpha : float
        Smoothing factor (0 < alpha <= 1).

    Returns
    -------
    float
        The exponentially weighted mean of the entire array.
    """
    n = len(x)
    weights = (1 - alpha) ** np.arange(n)[::-1]  # Reverse weights to match the order
    weights /= weights.sum()  # Normalize the weights
    return np.sum(x * weights)

In [8]:
rng = np.random.default_rng(42)
x = rng.random(50)
series = pd.Series(x)
alpha = 0.5  # The alpha value for the ewma

In [9]:
%%timeit 
ewm_jit(x, alpha)

The slowest run took 69.60 times longer than the fastest. This could mean that an intermediate result is being cached.
5.53 μs ± 12 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit 
ewm_numpy(x, alpha)

8.44 μs ± 339 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [11]:
print(ewm_jit(x, alpha))
print(series.ewm(alpha=alpha, adjust=True).mean().iloc[-1])
assert np.allclose(ewm_jit(x, alpha), series.ewm(alpha=alpha, adjust=True).mean().iloc[-1])

0.34660893251701275
0.34660893251701275


## RollingFeatures

In [12]:
from skforecast.preprocessing import RollingFeatures

y = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
x = np.array([1, 20, 3])

stats = ['min', 'max',  'ewm']
    
rolling = RollingFeatures(
    stats=stats,
    window_sizes=3,
    stats_kwargs={'ewm': {'alpha': 0.5}}
)

rolling.transform(X=x)

TypeError: RollingFeatures.__init__() got an unexpected keyword argument 'stats_kwargs'

In [None]:
rolling.transform_batch(X=y)

Unnamed: 0,roll_min_3,roll_max_3,roll_ewm_3
3,1.0,3.0,2.428571
4,2.0,4.0,3.428571
5,3.0,5.0,4.428571
6,4.0,6.0,5.428571
7,5.0,7.0,6.428571
8,6.0,8.0,7.428571
9,7.0,9.0,8.428571


In [None]:
print(ewm_jit(y.iloc[:3].to_numpy(), 0.3))
print(ewm_jit(y.iloc[1:4].to_numpy(), 0.3))
print(ewm_jit(y.iloc[2:5].to_numpy(), 0.3))
print(ewm_jit(y.iloc[3:6].to_numpy(), 0.3))
print(ewm_jit(y.iloc[4:7].to_numpy(), 0.3))
print(ewm_jit(y.iloc[5:8].to_numpy(), 0.3))
print(ewm_jit(y.iloc[6:9].to_numpy(), 0.3))

2.232876712328767
3.232876712328767
4.232876712328767
5.232876712328768
6.232876712328767
7.232876712328768
8.232876712328768
