In [None]:
import var_selection
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.float_format', lambda x: '%.3f' % x)
np.set_printoptions(precision=4)

In [None]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

dataset = pd.read_csv('./data/happyness_data.csv', delimiter=' ')[['happy','money','sex','love','work']]
X = dataset[['money','sex','love','work']]
y = dataset[['happy']]
corrMat = X.corr()
print('Covariate sample correlation matrix:\n'+str(corrMat))

#### Value are identical to those in the paper (page 16).

In [None]:
[Evals, Evecs] = np.linalg.eig(corrMat)
print('Eigenvalues of Covariate sample correlation matrix: \n'+str(Evals))

#### Value are identical to those in the paper (page 16).

In [None]:
# convert pandas to numpy
if type(X) == pd.DataFrame:
    X = np.concatenate([X.as_matrix(), np.ones([X.shape[0],1])], axis = 1)
    y = y.as_matrix()

In [None]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)

# ordinary least squares solutions
OLS_β = np.dot( np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))

#error = y - X.dot(OLS_β)
#error = error.T.dot(error)

OLS_β_std = np.linalg.inv(np.dot(X.T, X))

# priors for τ_star
delta_happiness = 4.
delta_money = 50.
delta_sex = 0.5
delta_love = 1.
delta_work = 2.

τ_star = np.atleast_2d(delta_happiness / np.array([delta_money,delta_sex,delta_love,delta_work])).T

stats = np.concatenate([OLS_β[:-1], np.sqrt(np.atleast_2d(np.diag(OLS_β_std)[:-1]).T), τ_star], axis = 1)

stats = pd.DataFrame(stats, columns = ['OLS coefficient','OLS standard error', 'prior std τ*'], index = ['money','sex','love','work'])
print(stats)

#### These values are very similar to those in the paper (page 16).  

In [None]:
# Formulas found here: https://pdfs.semanticscholar.org/3ace/886849dd48eb911b0491d70ef3ec197f9234.pdf
SS_reg = (OLS_β.T.dot(X.T.dot(y)) - (1./X.shape[0])*(np.ones_like(y).T.dot(y))**2)[0,0]
SS_total = (y.T.dot(y) - (1./X.shape[0])*(np.ones_like(y).T.dot(y))**2)[0,0]
print('multiple correlation coef: {:.3f}'.format(SS_reg/SS_total))

#### According to the paper the prior mean of the multiple correlation coefficient is 0.75. (?)

In [None]:
"""
β_zero = np.array([0, 0])
var_zero = 10000
p = np.array([0.5, 0.5])
τ = np.ones_like(β_zero)
υ = -10 * np.ones_like(β_zero)
λ = 10 * np.ones_like(β_zero)
ν = 2
iterations = 1000
"""
β = np.array([0,0,0,0,0]) #np.zeros([5,1])
ϵ_squared = 2.5**2
p = np.array([0.5,0.5,0.5,0.5,0]) #np.concatenate([0.5 * np.ones([4,1]), np.zeros([1,1])])
τ = np.concatenate([τ_star, 9 * np.ones([1,1])]) #np.ones_like(β)
#υ = -10 * np.ones_like(β) #np.ones([5,1]) * 10e5
υ = np.ones([5,1]) * 10e3
#λ = 10 * np.ones_like(β)# np.concatenate([0.1*np.ones([1,1]), 10e10 * np.ones([4,1])])
λ = np.concatenate([0*np.ones([4,1]), -1000 * np.ones([1,1])])
ν = 0.01
iterations = 1000

In [None]:
# TN (lambda, υ)
"""
X: numpy.ndarray
y: numpy.ndarray
β: numpy.ndarray
ϵ_squared : scalar
p: iterable
τ: numpy.ndarray
v: scalar
ν: iterable
λ: iterable
iterations: scalar
"""
result = var_selection.variable_selection(X, y, β, ϵ_squared, p, τ, ν, υ, λ, iterations)

In [None]:
result[0]