In [11]:
%load_ext autoreload
%autoreload 2

import pickle

import numpy as np
import pandas as pd
from plotly.offline import init_notebook_mode, iplot, plot
from plotly import plotly as pty
from IPython.display import display, HTML

from var_selection import variable_selection

init_notebook_mode(connected=False)
np.set_printoptions(precision=4)
pd.set_option('display.max_rows', 12)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [4]:
size = 1000
dims = 5
zero_coefs = [2, 3]

In [2]:
coefs = np.random.rand(1, dims)
coefs[:, zero_coefs] = 0

X = np.random.rand(size, dims)
Y = ((X @ coefs.T) + np.random.randn(size, 1)*10)

print("β coefficients:")
coefs

β coefficients:


array([[ 0.3116,  0.335 ,  0.    ,  0.    ,  0.3435]])

In [3]:
β_lstsq, *_ = np.linalg.lstsq(X, Y)
print("Least squares solution")
print(β_lstsq.flatten())
print("Least squares error")
print((Y - X@β_lstsq).T@(Y - X@β_lstsq))

Least squares solution
[ 1.0761 -1.2483 -1.8416  0.2841  2.5736]
Least squares error
[[ 98158.0653]]


In [13]:
with open('results.pickle', 'rb') as fp:
    results = pickle.load(fp)

β, var, chain, probs, models = results
coefs = np.array([[0.3116,  0.335 ,  0.    ,  0.    ,  0.3435]])

In [22]:
β_zero = np.zeros(dims)
var_zero = 1000
p = np.array([0.1, 0.9, 0.1, 0.9, 0])
τ = np.ones_like(β_zero)
υ = -10 * np.ones_like(β_zero)
λ = 10 * np.ones_like(β_zero)
ν = 2
iterations = int(1e6)

In [9]:
results = variable_selection(X, Y, β_zero,
                             var_zero, p,
                             τ, ν, υ, λ,
                             iterations,
                             verbose=10)

β, var, chain, probs, models = results

with open('results.pickle', 'wb') as fp:
    pickle.dump(results, fp)


invalid value encountered in log



0th iteration
Error: 100711.08755271348
β: [-1.0149  0.      2.1215  0.     -0.153 ]
σ²: 108.45570596748024, σ: 10.414206929357618

100000th iteration
Error: 98405.3888065936
β: [ 0.334   0.     -1.5534  0.      1.8689]
σ²: 98.65538852283265, σ: 9.932541896354259

200000th iteration
Error: 98398.27730306512
β: [ 1.2708  0.     -2.4701  0.      2.5318]
σ²: 100.61021479194787, σ: 10.030464335809578

300000th iteration
Error: 98422.58138207352
β: [ 1.5844  0.     -2.7366  0.      2.2791]
σ²: 96.62979912829674, σ: 9.830045733784596

400000th iteration
Error: 98353.25428815887
β: [ 1.109   0.     -2.5057  0.      2.4831]
σ²: 99.15067133464495, σ: 9.957443011870314

500000th iteration
Error: 98418.83681702941
β: [ 0.      0.     -2.0244  0.      3.0327]
σ²: 96.60251814215161, σ: 9.828658003112714

600000th iteration
Error: 98487.86564299719
β: [ 0.      0.     -1.1649  0.      1.8547]
σ²: 99.38185142411344, σ: 9.969044659550555

700000th iteration
Error: 98407.22887842137
β: [ 0.      0.    

In [5]:
df = pd.DataFrame(chain, columns=[f'β_{i}' for i in range(dims)] + ['σ²'])
df['σ'] = df['σ²']**0.5
df = df.drop('σ²', axis=1)
df

Unnamed: 0,β_0,β_1,β_2,β_3,β_4,σ
0,-1.014879,0.0,2.121513,0.0,-0.153014,10.414207
1,0.000000,0.0,1.116826,0.0,0.151632,9.913209
2,0.271047,0.0,0.196882,0.0,1.092227,10.008518
3,0.000000,0.0,-0.459915,0.0,1.406494,9.903380
4,0.080116,0.0,0.000000,0.0,0.981718,9.925296
5,0.266615,0.0,-0.874854,0.0,1.389610,9.974520
...,...,...,...,...,...,...
999994,0.000000,0.0,-1.036740,0.0,1.775951,10.027211
999995,0.468957,0.0,-1.844482,0.0,2.370101,9.778276
999996,0.000000,0.0,-1.211871,0.0,2.424447,9.790529


In [6]:
df['idx'] = pd.PeriodIndex(start='2017-07-08', periods=int(1e6), freq='s')
df.set_index('idx', inplace=True)
df

Unnamed: 0_level_0,β_0,β_1,β_2,β_3,β_4,σ
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-07-08 00:00:00,-1.014879,0.0,2.121513,0.0,-0.153014,10.414207
2017-07-08 00:00:01,0.000000,0.0,1.116826,0.0,0.151632,9.913209
2017-07-08 00:00:02,0.271047,0.0,0.196882,0.0,1.092227,10.008518
2017-07-08 00:00:03,0.000000,0.0,-0.459915,0.0,1.406494,9.903380
2017-07-08 00:00:04,0.080116,0.0,0.000000,0.0,0.981718,9.925296
2017-07-08 00:00:05,0.266615,0.0,-0.874854,0.0,1.389610,9.974520
...,...,...,...,...,...,...
2017-07-19 13:46:34,0.000000,0.0,-1.036740,0.0,1.775951,10.027211
2017-07-19 13:46:35,0.468957,0.0,-1.844482,0.0,2.370101,9.778276
2017-07-19 13:46:36,0.000000,0.0,-1.211871,0.0,2.424447,9.790529


In [38]:
df2 = df.resample('5000s').last()
df2

Unnamed: 0_level_0,β_0,β_1,β_2,β_3,β_4,σ
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-07-08 00:00:00,-0.210026,0.000000,-1.675627,0.0,2.990353,9.805119
2017-07-08 01:23:20,1.013704,0.000000,-1.961039,0.0,2.120469,9.992669
2017-07-08 02:46:40,0.237286,0.000000,-1.944100,0.0,2.604515,9.778588
2017-07-08 04:10:00,1.268851,0.000000,-2.784226,0.0,2.346751,9.566552
2017-07-08 05:33:20,0.000000,0.000000,-1.967399,0.0,3.256154,10.033804
2017-07-08 06:56:40,0.000000,0.000000,-2.342998,0.0,2.999054,10.050220
...,...,...,...,...,...,...
2017-07-19 05:26:40,0.776969,0.000000,-2.194785,0.0,2.222615,9.859438
2017-07-19 06:50:00,0.330720,0.000000,-2.221896,0.0,2.939968,9.957351
2017-07-19 08:13:20,0.725651,0.000000,-1.953099,0.0,2.416772,9.812302


In [39]:
frames = []
for i in range(1, len(df2)):
    data = []
    for j, (key, val) in enumerate(df2.iloc[:i].to_dict().items(), 1):
        datum = {
            'y': list(val.values()),
            'x': list(range(i)),
            'name': key,
            'xaxis': 'x',
            'marker': 'line'
        }
        if j > 1:
            datum['yaxis'] = f'y{j}'
        data.append(datum)
    frames.append({'data': data, 'name': i})

In [40]:
min_x = 1
max_x = len(frames)-1
figure = {
    'data': frames[-1]['data'],
    'layout': {
        'height': 900,
        'xaxis': {'range': [min_x, max_x], 'autorange': False},
        'yaxis': {'range': [-0.25, 3.5], 'domain': [0, 0.14]},
        'yaxis2': {'range': [-1, 1], 'domain': [0.152, 0.292]},
        'yaxis3': {'range': [-3.5, -0.5], 'domain': [0.304, 0.444]},
        'yaxis4': {'range': [-1, 1], 'domain': [0.456, 0.596]},
        'yaxis5': {'range': [1.5, 3.5], 'domain': [0.608, 0.848]},
        'yaxis6': {'range': [9.4, 10.6], 'domain': [0.858, 1]},
        'title': f"Synthetic dataset - 1M iterations - "
                 f"No burn-in - 5k thinning - "
                 f"p: {p} - True β: {coefs} - True σ: 10",
        'updatemenus': [{
            'type': 'buttons',
            'buttons': [
                {
                    'args': [
                        None, {
                            'frame': {'duration': 500, 'redraw': False},
                            'fromcurrent': True,
                            'transition': {
                                'duration': 300,
                                'easing': 'quadratic-in-out'}
                        }
                    ],
                    'label': 'Play',
                    'method': 'animate'
                },
                {
                    'args': [
                        [None], {
                            'frame': {'duration': 0, 'redraw': False},
                            'mode': 'immediate',
                            'transition': {'duration': 0}
                        }
                    ],
                    'label': 'Pause',
                    'method': 'animate'
                }
            ]
        }],
        'sliders': [{
            'active': 0,
            'yanchor': 'top',
            'xanchor': 'left',
            'currentvalue': {
                'font': {'size': 20},
                'visible': True,
                'xanchor': 'right'
            },
            'transition': {'duration': 300, 'easing': 'cubic-in-out'},
            'pad': {'b': 10, 't': 50},
            'len': 1,
            'x': 0,
            'y': 0,
            'steps': [
                {
                    'args': [
                        [i],
                        {
                            'frame': {'duration': 300, 'redraw': False},
                            'mode': 'immediate',
                            'transition': {'duration': 300}
                        }
                    ],
                    'label': i,
                    'method': 'animate'
                }
                for i in range(min_x, max_x)
            ]
        }]
    },
    'frames': frames[min_x:max_x]
}

In [24]:
iplot(figure, show_link=False)

In [41]:
plot(figure, show_link=False, filename='variable_selection_synth_data.html')

'file:///home/mtambos/google_drive/Projects/mcm/project/variable_selection_synth_data.html'