In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from preprocessing_utils import *

In [2]:
def computeMSE(yScaled1, yScaled2, scaler=None):
    if scaler:
        yScaled1 = scaler.inverse_transform(yScaled1)
        yScaled2 = scaler.inverse_transform(yScaled2)
        return np.sum((yScaled1 - yScaled2)**2)/len(yScaled1)
    else:
        return np.sum((yScaled1 - yScaled2)**2)/len(yScaled1)

In [3]:
def computeAccuracy(Y,Yhat):
    """ Test using specific model's eval function. """
    return sum(Y==Yhat)/len(Y)

In [4]:
# Assumes X, Y inputs are already scaled down.
def computeTrainError(model, X, Y, YScaler):
    model.train(X,Y)
    YHat = model.predict(X)
    YHat.shape = [YHat.shape[0], 1]
    return computeMSE(Y, YHat, YScaler)

In [5]:
def computeTrainAccuracy(model, X, Y):
    model.train(X,Y)
    YHat = model.predict(X)
    return computeAccuracy(Y, YHat)

In [6]:
# Assumes X, Y inputs are already scaled down.
# k refers to the number of partitions for k-fold cross-validation.
def computeTestError(model, X, Y, YScaler, k=10):
    return computeTestErrorAndAccuracy(model, X, Y, k=k)[0]

# Assumes X, Y inputs are already scaled down.
# k refers to the number of partitions for k-fold cross-validation.
def computeTestAccuracy(model, X, Y, k=10):
    return computeTestErrorAndAccuracy(model, X, Y, k=k)[1]

In [7]:
def computeTestErrorAndAccuracy(model, X, Y, k=10):
    np.random.seed(0)
    test_errors = []
    accuracies = []
    X, Y = shuffle(X, Y)
    N = len(Y)
    # Performing k-fold cross-validation.
    for k_i in range(k):
        XTrain = np.vstack([X[0 : (int) (np.round(k_i / k * N)), :], X[(int) (np.round((k_i+1) / k * N)) : , :]])
        YTrain = np.hstack([Y[0 : (int) (np.round(k_i / k * N))], Y[(int) (np.round((k_i+1) / k * N)) :]])
        XTest = X[(int) (np.round(k_i / k * N)) : (int) (np.round((k_i+1) / k * N)), :]
        YTest = Y[(int) (np.round(k_i / k * N)) : (int) (np.round((k_i+1) / k * N))]
        model.train(XTrain,YTrain)
        YHat = model.predict(XTest)
        test_errors += [computeMSE(YTest, YHat, YScaler)]
        accuracies += [computeAccuracy(YTest, YHat)]
    return np.mean(test_errors), np.mean(accuracies)

In [12]:
def computeRecoloredTestErrorAndAccuracy(model, X, Y, k=10, color_options={'gamma' : False, 
                                                                           'rotate' : False}):
    np.random.seed(0)
    test_errors = []
    accuracies = []
    X, Y = shuffle(X, Y)
    X_original = X
    N = len(Y)
    # Performing k-fold cross-validation.
    for k_i in range(k):
#         print("running fold {} of {}".format(k_i, k))
        
        # Processing images for model
        img_data = []
        for img in X_original:
            data = convert_image_to_data(img.astype('uint8'))
            img_data.append(data)
        img_data = np.array(img_data).reshape(X.shape[0], 1, 4, 3)
        X = np.asarray(recolor_images(img_data, color_options=color_options))
        X = X.reshape(X.shape[0], 12)
        
        XTrain = np.vstack([X[0 : (int) (np.round(k_i / k * N)), :], X[(int) (np.round((k_i+1) / k * N)) : , :]])
        YTrain = np.vstack([Y[0 : (int) (np.round(k_i / k * N))], Y[(int) (np.round((k_i+1) / k * N)) :]])
#         YTrain = np.hstack([Y[0 : (int) (np.round(k_i / k * N))], Y[(int) (np.round((k_i+1) / k * N)) :]])
        XTest = X[(int) (np.round(k_i / k * N)) : (int) (np.round((k_i+1) / k * N)), :]
        YTest = Y[(int) (np.round(k_i / k * N)) : (int) (np.round((k_i+1) / k * N))]
        model.train(XTrain, YTrain)
        YHat = model.predict(XTest)
        test_errors += [computeMSE(YTest, YHat, YScaler)]
#         test_errors += [computeMSE(YTest, YHat)]
        accuracies += [computeAccuracy(YTest, YHat)]
    return np.mean(test_errors), np.mean(accuracies)

In [9]:
def create_one_hot_label(Y,N_C):
    ''''
    Input
    Y: list of class labels (int)
    N_C: Number of Classes

    Returns
    List of one hot arrays with dimension N_C

    '''
    
    y_one_hot = []
    for y in Y:

        one_hot_label = np.zeros(N_C)

        one_hot_label[y] = 1.0
        y_one_hot.append(one_hot_label)

    return  y_one_hot

In [10]:
def compute_covariance_matrix(X,Y):
    ''''
    Input
    X: List of data points
    Y: list of one hot class labels

    Returns
    Covariance Matrix of X and Y
    Note: Assumes Mean is subtracted
    '''
    dim_x = np.max(X[0].shape)
    dim_y = np.max(Y[0].shape)

    N = len(X)
    C_XY = np.zeros([dim_x,dim_y])


    for i in range(N):
        x = np.array([X[i]])
        y = np.array([Y[i]])
        C_XY += np.dot(x.T,y)

    return C_XY/float(N)

In [11]:
def subtract_mean_from_data(X,Y):
    ''''
    Input
    X: List of data points
    Y: list of one hot class labels

    Returns
    X and Y with mean subtracted
    '''

    ss_x = StandardScaler(with_std = False)
    ss_y = StandardScaler(with_std = False)

    ss_x.fit(X)
    X = ss_x.transform(X)

    ss_y.fit(Y)
    Y = ss_y.transform(Y)

    return X,Y