In [1]:
import var_selection
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
pd.set_option('display.float_format', lambda x: '%.3f' % x)
np.set_printoptions(precision=4)

In [2]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

dataset = pd.read_csv('./data/hald_data.csv', delimiter='\t', skiprows=1, names=['Heat', 'Tricalcium_aluminate','Tricalcium_silicate','Tetracalcium_alumino_ferrite','Dicalcium_silicate'])
X = dataset[['Tricalcium_aluminate','Tricalcium_silicate','Tetracalcium_alumino_ferrite','Dicalcium_silicate']]
y = dataset[['Heat']]
corrMat = X.corr()
print('Covariate sample correlation matrix:\n'+str(corrMat))

Covariate sample correlation matrix:
                              Tricalcium_aluminate  Tricalcium_silicate  \
Tricalcium_aluminate                         1.000                0.229   
Tricalcium_silicate                          0.229                1.000   
Tetracalcium_alumino_ferrite                -0.824               -0.139   
Dicalcium_silicate                          -0.245               -0.645   

                              Tetracalcium_alumino_ferrite  Dicalcium_silicate  
Tricalcium_aluminate                                -0.824              -0.245  
Tricalcium_silicate                                 -0.139              -0.645  
Tetracalcium_alumino_ferrite                         1.000               0.297  
Dicalcium_silicate                                   0.297               1.000  


In [3]:
[Evals, Evecs] = np.linalg.eig(corrMat)
print('Eigenvalues of Covariate sample correlation matrix: \n'+str(Evals))

Eigenvalues of Covariate sample correlation matrix: 
[ 2.2009  1.2745  0.1508  0.3737]


In [4]:
if type(X) == pd.DataFrame:
    X = np.concatenate([X.as_matrix(), np.ones([X.shape[0],1])], axis = 1)
    y = y.as_matrix()

In [5]:
pd.set_option('display.float_format', lambda x: '%.5f' % x)

OLS_β = np.dot( np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))
OLS_β_std = np.linalg.inv(np.dot(X.T, X))

In [6]:
delta_Heat = 20.
delta_Tricalcium_aluminate = 10.
delta_Tricalcium_silicate = 22.5
delta_Tetracalcium_alumino_ferrite = 8.5
delta_Dicalcium_silicate = 27.

In [7]:
τ_star = np.atleast_2d(delta_Heat / np.array([delta_Tricalcium_aluminate,delta_Tricalcium_silicate,delta_Tetracalcium_alumino_ferrite,delta_Dicalcium_silicate])).T

stats = np.concatenate([OLS_β[:-1], np.sqrt(np.atleast_2d(np.diag(OLS_β_std)[:-1]).T), τ_star], axis = 1)

stats = pd.DataFrame(stats, columns = ['OLS coefficient','OLS standard error', 'prior std τ*'], index = ['Tricalcium_aluminate','Tricalcium_silicate','Tetracalcium_alumino_ferrite','Dicalcium_silicate'])
print(stats)

                              OLS coefficient  OLS standard error  \
Tricalcium_aluminate                  1.69345             0.08980   
Tricalcium_silicate                   0.65953             0.02523   
Tetracalcium_alumino_ferrite          0.24612             0.08357   
Dicalcium_silicate                    0.00414             0.02604   

                              prior std τ*  
Tricalcium_aluminate               2.00000  
Tricalcium_silicate                0.88889  
Tetracalcium_alumino_ferrite       2.35294  
Dicalcium_silicate                 0.74074  


In [8]:
# Formulas found here: https://pdfs.semanticscholar.org/3ace/886849dd48eb911b0491d70ef3ec197f9234.pdf
SS_reg = (OLS_β.T.dot(X.T.dot(y)) - (1./X.shape[0])*(np.ones_like(y).T.dot(y))**2)[0,0]
SS_total = (y.T.dot(y) - (1./X.shape[0])*(np.ones_like(y).T.dot(y))**2)[0,0]
print('multiple correlation coef: {:.3f}'.format(SS_reg/SS_total))

multiple correlation coef: 0.982


In [9]:
β = np.array([0,0,0,0,0], dtype = float) #np.zeros([5,1])
ϵ_squared = 2.5**2
p = np.array([0.5,0.5,0.5,0.5,0]) #np.concatenate([0.5 * np.ones([4,1]), np.zeros([1,1])])
τ = np.concatenate([τ_star, 9 * np.ones([1,1])]) #np.ones_like(β)
λ = -1 * np.ones([5,1]) * np.infty
υ = np.ones([5,1]) * np.infty
ν = 0.01
iterations = 1000000

In [None]:
# TN (lambda, υ)
results = var_selection.variable_selection(X, y, β, ϵ_squared, p, τ, ν, υ, λ, iterations, verbose=10)
pickle.dump(file = open("./results/hald/1000000/p_0.5_tau_1.pickle", 'wb'), obj=results)

0th iteration
Error: 446713.02609893045
β: [  0.      -1.3344   0.       0.     -22.7623]
σ²: 5.303818446833617e+72, σ: 2.3030020509833717e+36



  bf = np.exp(β_bar**2/(2*var_star) - β_old**2/(2*τ**2))


100000th iteration
Error: 58.858944422186426
β: [  1.4662   0.6751   0.       0.      52.1648]
σ²: 4.391246051292085, σ: 2.095530016795771

200000th iteration
Error: 72.11512292259124
β: [  1.5827   0.5996   0.       0.      55.0139]
σ²: 3.527151855435038, σ: 1.8780713126596227

300000th iteration
Error: 82.87061186661005
β: [  1.6967   0.5904   0.4263   0.      48.8458]
σ²: 5.65466016881486, σ: 2.3779529366273966

400000th iteration
Error: 70.07041528508307
β: [  1.3052   0.6918   0.       0.      52.6516]
σ²: 6.156267732113758, σ: 2.4811827284812695

500000th iteration
Error: 71.67623368558438
β: [  1.4343   0.617    0.       0.      55.722 ]
σ²: 3.2627316673627207, σ: 1.8063033154381134

600000th iteration
Error: 81.757953726633
β: [  1.6013   0.606    0.       0.      55.2156]
σ²: 6.456894485142944, σ: 2.541042007748582

700000th iteration
Error: 63.42497393668697
β: [  1.3556   0.6681   0.       0.      52.9438]
σ²: 5.963100224991428, σ: 2.4419459914157455

800000th iteration
Erro