In [1]:
import hydra
from omegaconf import DictConfig, OmegaConf
import numpy as np
import pickle as pkl
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product
from datetime import datetime
from algorithms import BOMP
import json
import hashlib
from data_generation import *

import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_model_params(config):
    # all_params = OmegaConf.to_container(config, resolve=True)["MODEL"]
    all_params = config["MODEL"]
    param_grid = {}
    fixed_params = {}

    Bag_lst = all_params["Bag_lst"]
    K_lst = all_params["K_lst"]

    del all_params["Bag_lst"]
    del all_params["K_lst"]

    for param, value in all_params.items():
        if isinstance(value, list):
            param_grid[param] = value
        else:
            fixed_params[param] = value

    fixed_params["Bag_lst"] = Bag_lst
    fixed_params["K_lst"] = K_lst
    return fixed_params, param_grid



In [3]:
configs = {
"MODEL": {
    "Bag_lst": [1, 50, 100],
    "K_lst": [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    "agg_func": "weight",
    "atom_bag_percent": [0.5, 0.6, 0.7, 0.8, 0.9],
    "ignore_warning": True,
    "random_seed": 1,
    "replace_flag": False,
    "select_atom_percent": 0,
    "signal_bag_percent": [0.6, 0.8, 1],
},
"TEST": {
    "cv_num": 5,
    "m": 20,
    "model": "BOMP",
    "n": 600,
    "noise_levels": [0.12,0.14],
    "p": 1000,
    "trial_num": 20,
},
"filename": "BOMP_600_1000_20_nr_0713.yaml",
"hydra": {
    "hydra_logging": {
        "level": "CRITICAL"
    },
    "job_logging": {
        "level": "CRITICAL"
    },
    "run": {
        "dir": "memory/0713/"
    }
}
}

n_tmp = configs["TEST"]["n"]
p_tmp = configs["TEST"]["p"]
m_tmp = configs["TEST"]["m"]
noise_level_lst = configs["TEST"]["noise_levels"]
model_name = configs["TEST"]["model"]
cv_num = configs["TEST"]["cv_num"]
trial_num = configs["TEST"]["trial_num"]

# Get n, p, m, noise_level combinations
if not isinstance(n_tmp, list):
    n_tmp = [n_tmp]
if not isinstance(p_tmp, list):
    p_tmp = [p_tmp]
if not isinstance(m_tmp, list):
    m_tmp = [m_tmp]

npm_lst = list(product(n_tmp, p_tmp, m_tmp))

if not isinstance(noise_level_lst, list):
    noise_level_lst = [noise_level_lst]

# Get model parameters
fixed_params, param_grid = get_model_params(configs)


In [4]:
model = BOMP(**fixed_params)


n = npm_lst[0][0]
p = npm_lst[0][1]
m = npm_lst[0][2]
noise_level = noise_level_lst[0]
trial_id = 1

Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
(
    true_signal,
    dictionary,
    true_indices,
    true_coefficients,
    perturbed_signal,
) = Data_Geneartor.shuffle()
X_train, X_test, y_train, y_test = train_test_split(
    dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
)
temp = BOMP(atom_bag_percent=0.5,signal_bag_percent=0.6)
temp.fit(X_train, y_train)

[[ 1.          8.          0.03660858]
 [ 2.          8.          0.03364462]
 [ 3.         10.          0.03101546]
 [ 4.         10.          0.02355074]
 [ 5.          4.          0.0203709 ]
 [ 6.         10.          0.01823586]
 [ 7.         10.          0.01843236]
 [ 8.         10.          0.01677281]
 [ 9.          9.          0.01542977]
 [10.         10.          0.01454274]]


(array([ 3.81511933e-02,  1.01722459e-01, -3.00705959e-01, -2.57130683e-01,
        -4.31151317e-01, -6.76168481e-02,  5.90171082e-02, -8.35489350e-02,
        -4.02525978e-02, -2.72826389e-01,  1.00383045e-01, -2.38160455e-02,
        -5.76591471e-02,  7.73288011e-02,  8.95581979e-02,  1.79916256e-01,
        -1.98328507e-01, -2.56895552e-02, -1.59716271e-01, -4.52044746e-01,
        -6.86218994e-04, -1.95897634e-01, -1.13165087e-01,  1.48071296e-01,
        -2.54993490e-01, -3.15419133e-01, -2.31398154e-01,  1.89817758e-01,
        -2.46644257e-01, -1.28533603e-01, -1.09594686e-01, -1.32967121e-01,
         1.37834383e-02,  2.34380424e-03,  3.47138167e-01,  2.40401239e-01,
         3.25598053e-01, -1.30954191e-01, -1.56820863e-01, -1.31206749e-01,
        -1.53254947e-01, -1.55380209e-01,  2.59314590e-02, -1.14410030e-01,
        -1.46879773e-01,  3.61213519e-01, -8.66151012e-02,  1.66836982e-01,
        -2.20509239e-01,  5.48171310e-03, -1.43460921e-02,  2.26123754e-01,
        -8.6

In [5]:
# model = BOMP(**fixed_params)


# n = npm_lst[0][0]
# p = npm_lst[0][1]
# m = npm_lst[0][2]
# noise_level = noise_level_lst[0]
# trial_id = 1

# Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
# (
#     true_signal,
#     dictionary,
#     true_indices,
#     true_coefficients,
#     perturbed_signal,
# ) = Data_Geneartor.shuffle()
# X_train, X_test, y_train, y_test = train_test_split(
#     dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
# )
# gs = GridSearchCV(
#     model,
#     param_grid,
#     cv=cv_num,
#     scoring="neg_mean_squared_error",
#     n_jobs=-1,
#     verbose=0,
# )
# gs.fit(X_train, y_train)
# cv_err_lst = -gs.cv_results_["mean_test_score"]