In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

In [9]:
def computeMSE(yScaled1, yScaled2, scaler=None):
    if scaler:
        yScaled1 = scaler.inverse_transform(yScaled1)
        yScaled2 = scaler.inverse_transform(yScaled2)
        return np.sum((yScaled1 - yScaled2)**2)/len(yScaled1)
    else:
        return np.sum((yScaled1 - yScaled2)**2)/len(yScaled1)

In [10]:
# Assumes X, Y inputs are already scaled down.
def computeTrainError(model, X, Y, YScaler):
    model.train(X,Y)
    YHat = model.predict(X)
    YHat.shape = [YHat.shape[0], 1]
    return computeMSE(Y, YHat, YScaler)

In [11]:
# Assumes X, Y inputs are already scaled down.
# k refers to the number of partitions for k-fold cross-validation.
def computeTestError(model, X, Y, YScaler, k=10):
    np.random.seed(0)
    test_errors = []
    X, Y = shuffle(X, Y)
    N = len(Y)
    # Performing k-fold cross-validation.
    for k_i in range(k):
        XTrain = np.vstack([X[0 : (int)(k_i / k * N), :], X[(int)((k_i+1) / k * N) : , :]])
        YTrain = np.vstack([Y[0 : (int)(k_i / k * N), :], Y[(int)((k_i+1) / k * N) : , :]])
        XTest = X[(int)(k_i / k * N) : (int)((k_i+1) / k * N), :]
        YTest = Y[(int)(k_i / k * N) : (int)((k_i+1) / k * N), :]
        model.train(XTrain,YTrain)
        YHat = model.predict(XTest)
        YHat.shape = [YHat.shape[0], 1]
        test_errors += [computeMSE(YTest, YHat, YScaler)]
    return np.mean(test_errors)