In [2]:
from skmisc.loess import loess, loess_outputs
from sklearn.cluster import KMeans
import numpy as np
import csv
from scipy.interpolate import splev, interp1d

In [3]:
data = [[], []]
with open('Coding3_Data.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        data[0].append(row[0])
        data[1].append(row[1])
data[0] = np.array(data[0][1:], dtype='float64')
data[1] = np.array(data[1][1:], dtype='float64')

data = np.asarray(data)

In [4]:
def lcv(y, y_hat, S):
    return sum(np.square((y - y_hat) / (1 - S))) / len(x)

In [5]:
def gcv(y, y_hat, S):
    return sum(np.square((y - y_hat) / (1 - (1 / len(x)) * sum(S)))) / len(x)


In [6]:
spans = np.arange(0.2, 0.95, 0.05)
lcvs = []
gcvs = []
x, y = data[0], data[1]
for span in spans:
    loess_model = loess(x, y)
    loess_model.model.span = span
    loess_model.fit()
    S = loess_model.outputs.diagonal
    y_hat = loess_model.outputs.fitted_values
    
    lcvs.append(lcv(y, y_hat, S))
    gcvs.append(gcv(y, y_hat, S))
    
lcvs = np.array(lcvs)
gcvs = np.array(gcvs)
best_span_lcv = spans[np.argmin(lcvs)]
best_span_gcv = spans[np.argmin(gcvs)]

In [7]:
print('Best span chosen by LCV: ', best_span_lcv)
print('Best span chosen by GCV: ', best_span_gcv)

Best span chosen by LCV:  0.49999999999999994
Best span chosen by GCV:  0.49999999999999994


Part II

In [27]:
data = np.loadtxt(open("Sales_Transactions_Dataset_Weekly.csv", "rb"), delimiter=",", skiprows=1, usecols=range(1,53))
 
x = data - np.mean(data, axis=1, keepdims=True)
np.shape(x)
x

array([[ 1.36538462,  2.36538462,  0.36538462, ..., -3.63461538,
        -4.63461538,  0.36538462],
       [ 3.01923077,  2.01923077, -0.98076923, ..., -2.98076923,
         2.01923077, -3.98076923],
       [-1.69230769,  2.30769231, -0.69230769, ..., -0.69230769,
        -0.69230769, -1.69230769],
       ...,
       [ 0.5       , -0.5       , -0.5       , ..., -0.5       ,
         3.5       ,  2.5       ],
       [-0.32692308, -0.32692308, -0.32692308, ..., -0.32692308,
         1.67307692, -0.32692308],
       [-0.30769231,  0.69230769, -0.30769231, ..., -0.30769231,
        -0.30769231,  0.69230769]])

In [28]:
# converted from R's ns()
def ns(x, df=None, knots=None, boundary_knots=None, include_intercept=False):
    degree = 3
    
    if boundary_knots is None:
        boundary_knots = [np.min(x), np.max(x)]
    else:
        boundary_knots = np.sort(boundary_knots).tolist()

    oleft = x < boundary_knots[0]
    oright = x > boundary_knots[1]
    outside = oleft | oright
    inside = ~outside

    if df is not None:
        nIknots = df - 1 - include_intercept
        if nIknots < 0:
            nIknots = 0
            
        if nIknots > 0:
            knots = np.linspace(0, 1, num=nIknots + 2)[1:-1]
            knots = np.quantile(x[~outside], knots)

    Aknots = np.sort(np.concatenate((boundary_knots * 4, knots)))
    n_bases = len(Aknots) - (degree + 1)

    if outside.any():
        basis = np.empty((x.shape[0], n_bases), dtype=float)
        e = 1 / 4 # in theory anything in (0, 1); was (implicitly) 0 in R <= 3.2.2

        if oleft.any():
            k_pivot = boundary_knots[0]
            xl = x[oleft] - k_pivot
            xl = np.c_[np.ones(xl.shape[0]), xl]

            # equivalent to splineDesign(Aknots, rep(k.pivot, ord), ord, derivs)
            tt = np.empty((xl.shape[1], n_bases), dtype=float)
            for j in range(xl.shape[1]):
                for i in range(n_bases):
                    coefs = np.zeros((n_bases,))
                    coefs[i] = 1
                    tt[j, i] = splev(k_pivot, (Aknots, coefs, degree), der=j)

            basis[oleft, :] = xl @ tt

        if oright.any():
            k_pivot = boundary_knots[1]
            xr = x[oright] - k_pivot
            xr = np.c_[np.ones(xr.shape[0]), xr]

            tt = np.empty((xr.shape[1], n_bases), dtype=float)
            for j in range(xr.shape[1]):
                for i in range(n_bases):
                    coefs = np.zeros((n_bases,))
                    coefs[i] = 1
                    tt[j, i] = splev(k_pivot, (Aknots, coefs, degree), der=j)
                    
            basis[oright, :] = xr @ tt
        
        if inside.any():
            xi = x[inside]
            tt = np.empty((len(xi), n_bases), dtype=float)
            for i in range(n_bases):
                coefs = np.zeros((n_bases,))
                coefs[i] = 1
                tt[:, i] = splev(xi, (Aknots, coefs, degree))

            basis[inside, :] = tt
    else:
        basis = np.empty((x.shape[0], n_bases), dtype=float)
        for i in range(n_bases):
            coefs = np.zeros((n_bases,))
            coefs[i] = 1
            basis[:, i] = splev(x, (Aknots, coefs, degree))

    const = np.empty((2, n_bases), dtype=float)
    for i in range(n_bases):
        coefs = np.zeros((n_bases,))
        coefs[i] = 1
        const[:, i] = splev(boundary_knots, (Aknots, coefs, degree), der=2)

    if include_intercept is False:
        basis = basis[:, 1:]
        const = const[:, 1:]

    qr_const = np.linalg.qr(const.T, mode='complete')[0]
    basis = (qr_const.T @ basis.T).T[:, 2:]

    return basis

In [29]:
F = ns(x[3,:], df=10)
#F = ns(x, df=10)
F = F[:,1:]
np.shape(F)

BT = np.linalg.inv(F.T @ F) @ F.T @ x.T
B = BT.T
np.shape(B)

(811, 9)