In [2]:
import argparse
import yaml
import numpy as np
import pickle as pkl
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product
from datetime import datetime
import json
import hashlib
import os
import matplotlib.pyplot as plt

from algorithms import BMP
from data_generation import GaussianDataGenerator

In [3]:
p = 300
n = 500
m = 20
trial_id = 1
noise_level = 0.1

Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
(
    true_signal,
    dictionary,
    true_indices,
    true_coefficients,
    perturbed_signal,
) = Data_Geneartor.shuffle()
X_train, X_test, y_train, y_test = train_test_split(
    dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
)

In [5]:
model = BMP(N_bag = 10)
param_grid = {'K': [10, 20]}

def myscores(estimator, X, y):
    mse = estimator.score(X, y)
    coefs = estimator.coefficients
    ret_dict = {
        f'beta_{i}': coef for i, coef in enumerate(coefs)
    }
    ret_dict['mse'] = mse
    return ret_dict

gs = GridSearchCV(model, param_grid, cv=5, scoring=myscores, refit='mse', n_jobs=-1)
gs.fit(X_train, y_train)

gs.cv_results_


{'mean_fit_time': array([0.0120708, 0.0134933]),
 'std_fit_time': array([0.00384695, 0.00761734]),
 'mean_score_time': array([0.0003984 , 0.00041375]),
 'std_score_time': array([0.00014142, 0.00023064]),
 'param_K': masked_array(data=[10, 20],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'K': 10}, {'K': 20}],
 'split0_test_beta_0': array([0.        , 0.06152737]),
 'split1_test_beta_0': array([0., 0.]),
 'split2_test_beta_0': array([0.        , 0.16682392]),
 'split3_test_beta_0': array([0.        , 0.04183732]),
 'split4_test_beta_0': array([0.        , 0.02341987]),
 'mean_test_beta_0': array([0.       , 0.0587217]),
 'std_test_beta_0': array([0.        , 0.05774686]),
 'rank_test_beta_0': array([2, 1], dtype=int32),
 'split0_test_beta_1': array([0., 0.]),
 'split1_test_beta_1': array([0., 0.]),
 'split2_test_beta_1': array([ 0.        , -0.03768467]),
 'split3_test_beta_1': array([0., 0.]),
 'split4_test_beta_1': array([0., 0.])

In [19]:
K_lst = np.arange(10, 20, 1, dtype=int)
N_bag = 2
p = 5

K_coef_lst_mat = np.zeros((len(K_lst), N_bag, p))

coef_lst = []
for _ in range(N_bag):
   coef_tmp = np.ones(p)
   coef_lst.append(coef_tmp)

coef_lst_mat = np.stack(coef_lst, axis=0)

coef_lst_mat

K_coef_lst_mat[0, :, :] = coef_lst_mat
K_coef_lst_mat

array([[[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]])

In [20]:
K_coef_lst_mat = np.zeros((len(K_lst), N_bag, p))
K_coef_lst_mat

array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]])