In [14]:
__author__ = 'nileshtrip'
import numpy as np
import math
import itertools
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from numpy import transpose as trans
from collections import OrderedDict
import warnings
import pickle
import time
import os

In [16]:
def is_pos_def(A):
    
    """Checks if matrix is positive-definite"""
    
    if np.array_equal(A, A.T):
        try:
            np.linalg.cholesky(A)
            return True
        except np.linalg.LinAlgError:
            raise Exception("Design Matrix not PSD")
    else:
        raise Exception("Design Matrix Not Symmetric")

In [20]:
def cond(X):
    
    """Computes condition number of the sample covariance of X"""

    n,p = X.shape
    Sig = X.T@X/float(n)
    w, v = np.linalg.eigh(Sig) 
    
    return max(w)/min(w)

In [22]:
#Construct a rotation matrix with first row equal to the renormalized x_test vector. The remaining orthonormal rows span the
#orthogonal complement of x_test
def rotate_matrix(x_test):
    
    """Construct a rotation matrix with first row equal to the renormalized x_test vector. The remaining orthonormal rows span the
    orthogonal complement (o.c.) of x_test"""
    
    p = x_test.shape[0]
    
    u = np.divide(x_test, np.linalg.norm(x_test))
    m = np.eye(p)-np.outer(u,u)
    
    #Diagonalize projection matrix onto o.c.
    eig_vals, eig_vectors = np.linalg.eigh(m)
    U = eig_vectors[:, 1:]
    D = np.diag(eig_vals[1:])
    R = np.sqrt(D) @ np.transpose(U) #rows of R span o.c. of x_test
    
    #Stack u and R to form entire rotation matrix
    B = np.vstack((u, R))

    return B

In [24]:
def gen_train_data(n, p, s, train_dist, x_scale, beta_scale, sigma):

    """Generates training data X_train, y_train and true coefficients and feature centering and scaling matrix"""
    
    # n number of datapoints
    # p dimension
    # s sparsity of \beta_0 vector
    # x_scale scales the design X 
    # beta_scales scales the coef vector
    # sigma scales the additive noise \epsilon
    
    
    train_dist, dist_params = train_dist[0], train_dist[1]
    
    if train_dist=="normal":
        X_train = np.random.normal(size=(n, p))*x_scale
        
        coef = beta_scale * np.random.normal(size=p) # build coef vector
        coef[s:] = 0  # sparsify coef 
        
        y_train = X_train @ coef #construct y-values
        y_train += sigma * np.random.normal(size=n) #add epsilon noies
    else:
        raise Exception("Training Data Not Generated Correctly")
    
    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train) #define feature centering and scaling matrix
    
    kappa = cond(X_train)

    return X_train, y_train, coef, X_scaler, kappa

In [14]:
def gen_test_data(n, p, s, coef, X_scaler, test_dist, x_scale, sigma):
    
    """Generates test data X_train, y_train and true coefficients"""

    # n number of datapoints
    # p dimension
    # x_scale scales the design X 
    # sigma scales the additive noise \epsilon
    
    test_dist, dist_params = test_dist[0], test_dist[1]
    
    # Generate Test Data
    if test_dist=="normal":
        X_test = np.random.normal(size=(n, p))*x_scale
    elif test_dist=="normal+support_var":
        support_scale = dist_params["support_scale"]
        X = np.random.normal(size=(n, s))*x_scale*support_scale
        X_test = np.hstack((X, np.zeros(shape=(n, p-s))))
    elif test_dist=="normal+support_rank_one":
        support_scale = dist_params["support_scale"]
        X = np.zeros(shape=(n, p))
        for i in range(n):
            eps = np.random.normal(size=1)[0]*x_scale*support_scale
            for j in range(p):
                X[i, j] = eps
                
        X_test = np.multiply(X, coef)
    elif test_dist=="normal+support_shift":
        X_test = np.random.normal(size=(n, p))*x_scale
        mean_scale = dist_params["scale"]
        mean_shift=mean_scale*coef*x_scale
        X_test+=mean_shift
        
    else:
        raise Exception("Test Dist Set Incorrectly")

    # Construct y-values
    y_test = X_test @ coef #construct y-values
    y_test += sigma * np.random.normal(size=n) # Add noise if desired. Usually sigma=0 to compute RMRSE
        
    X_test = X_scaler.transform(X_test) # Scale/Transform Data using training data scaler.

    return X_test, y_test