In [1]:
import cupy as cp

In [2]:
import numpy as np
from scipy import linalg
from statsmodels.nonparametric.smoothers_lowess import lowess
import math

In [3]:
import numpy as np
import pandas as pd
from pandas.core.nanops import nanmean as pd_nanmean
from statsmodels.tsa.seasonal import DecomposeResult
# from statsmodels.tsa.filters._utils import _maybe_get_pandas_wrapper_freq
import statsmodels.api as sm

In [4]:
from math import ceil

In [5]:
def lowess2(x, y, f=2. / 3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest
    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.
    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.
    """
    n = len(x)
    r = int(ceil(f * n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w ** 3) ** 3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iter):
        for i in range(n):
            weights = delta * w[:, i]
            b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
            A = np.array([[np.sum(weights), np.sum(weights * x)],
                          [np.sum(weights * x), np.sum(weights * x * x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1] * x[i]

        residuals = y - yest
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta ** 2) ** 2

    return yest


In [6]:
def lowess_cp(x, y, f=2. / 3., iter=3):
    """lowess(x, y, f=2./3., iter=3) -> yest
    Lowess smoother: Robust locally weighted regression.
    The lowess function fits a nonparametric regression curve to a scatterplot.
    The arrays x and y contain an equal number of elements; each pair
    (x[i], y[i]) defines a data point in the scatterplot. The function returns
    the estimated (smooth) values of y.
    The smoothing span is given by f. A larger value for f will result in a
    smoother curve. The number of robustifying iterations is given by iter. The
    function will run faster with a smaller number of iterations.
    """
    n = len(x)
    r = int(ceil(f * n))
    h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
    w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
    w = (1 - w ** 3) ** 3
    yest = np.zeros(n)
    delta = np.ones(n)
    for iteration in range(iter):
        for i in range(n):
            weights = delta * w[:, i]
            b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
            A = np.array([[np.sum(weights), np.sum(weights * x)],
                          [np.sum(weights * x), np.sum(weights * x * x)]])
            beta = linalg.solve(A, b)
            yest[i] = beta[0] + beta[1] * x[i]

        residuals = y - yest
        s = np.median(np.abs(residuals))
        delta = np.clip(residuals / (6.0 * s), -1, 1)
        delta = (1 - delta ** 2) ** 2

    return yest


In [70]:

# n = 100
# x = np.linspace(0, 2 * math.pi, n)
# y = np.sin(x) + 0.3 * np.random.randn(n)

# f = 0.25
# yest = lowess2(x, y, f=f, iter=3)
# yest2 = lowess(y, x, frac=f, it=3)

In [7]:
n_diff = 1
df = pd.read_csv('data/ppnet_metar_v7.csv',  sep=';', index_col=0)
X, y = df.drop('Consumption', axis=1), df.Consumption
y_diff = y.diff(n_diff).dropna()
y_diff_index = y_diff.index

In [8]:
y_diff

Datetime
2013-01-01 01:00:00     2940.0
2013-01-01 02:00:00     1864.0
2013-01-01 03:00:00     1048.0
2013-01-01 04:00:00     7740.0
2013-01-01 05:00:00    21576.0
                        ...   
2019-06-30 19:00:00     1302.0
2019-06-30 20:00:00    -1005.0
2019-06-30 21:00:00      918.0
2019-06-30 22:00:00    -2950.0
2019-06-30 23:00:00    -4319.0
Name: Consumption, Length: 56927, dtype: float64

In [9]:
y = y_diff.values
x = np.array([i for i in range(len(y))])
f = 0.25

In [77]:
# yest = lowess2(x, y, f=f, iter=3)

In [80]:
# yest2 = lowess(y, x, frac=f, it=3)

In [81]:
# yest2[:, 1]

array([ -767.81370977,  -767.80717149,  -767.80064322, ...,
       -1135.2570173 , -1135.30611808, -1135.35521101])

In [10]:
y = y_diff.values
x = np.array([i for i in range(len(y))])
f = 0.25
iter = 3

In [11]:
y = cp.array(y)
x = cp.array(x)

In [15]:
n = x.shape[0]
r = int(cp.ceil(f * n))

In [18]:
r

14232

In [13]:
cp.abs(x - x[0])

array([    0,     1,     2, ..., 56924, 56925, 56926])

In [17]:
n

56927

In [24]:
(x[:, None] - x[None, :])

OutOfMemoryError: Out of memory allocating 25,925,467,136 bytes (allocated so far: 2,857,984 bytes).

In [25]:
x[:, None]

array([[    0],
       [    1],
       [    2],
       ...,
       [56924],
       [56925],
       [56926]])

In [27]:
x[None, :]

array([[    0,     1,     2, ..., 56924, 56925, 56926]])

In [26]:
x

array([    0,     1,     2, ..., 56924, 56925, 56926])

In [30]:
cp.abs((x - x))

array([0, 0, 0, ..., 0, 0, 0])

In [21]:
cp.clip(cp.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)

OutOfMemoryError: Out of memory allocating 25,925,467,136 bytes (allocated so far: 4,349,440 bytes).

In [34]:
n = x.shape[0]
r = int(cp.ceil(f * n))
h = cp.zeros(n)
for i in range(n):
    abs_value = cp.abs(x - x[i])
    abs_value_sorted = cp.sort(abs_value)
    ret_value = abs_value_sorted[r]
    h[i] = ret_value
w = cp.ones(n)
yest = cp.zeros(n)
delta = cp.ones(n)



In [37]:
n = x.shape[0]
r = int(cp.ceil(f * n))
h = cp.zeros(n)
for i in range(n):
    h[i] = cp.sort(cp.abs(x - x[i]))[r]
w = cp.ones(n)
yest = cp.zeros(n)
delta = cp.ones(n)

In [None]:
for iteration in range(iter):
    for i in range(n):
        weights = delta * w[:, i]
        b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
        A = np.array([[np.sum(weights), np.sum(weights * x)],
                      [np.sum(weights * x), np.sum(weights * x * x)]])
        beta = linalg.solve(A, b)
        yest[i] = beta[0] + beta[1] * x[i]

    residuals = y - yest
    s = np.median(np.abs(residuals))
    delta = np.clip(residuals / (6.0 * s), -1, 1)
    delta = (1 - delta ** 2) ** 2

In [1]:
n = len(x)

NameError: name 'x' is not defined

In [4]:
period=365
lo_frac=0.6
lo_delta=0.01

In [5]:
observed = np.asanyarray(df).squeeze()

NameError: name 'df' is not defined