In [1]:
import hydra
from omegaconf import DictConfig, OmegaConf
import numpy as np
import pickle as pkl
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product
from datetime import datetime
from algorithms import BOMP,OMP_Augmented
import json
import hashlib
from data_generation import *

import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_model_params(config):
    # all_params = OmegaConf.to_container(config, resolve=True)["MODEL"]
    all_params = config["MODEL"]
    param_grid = {}
    fixed_params = {}

    Bag_lst = all_params["Bag_lst"]
    K_lst = all_params["K_lst"]

    del all_params["Bag_lst"]
    del all_params["K_lst"]

    for param, value in all_params.items():
        if isinstance(value, list):
            param_grid[param] = value
        else:
            fixed_params[param] = value

    fixed_params["Bag_lst"] = Bag_lst
    fixed_params["K_lst"] = K_lst
    return fixed_params, param_grid



In [3]:
configs = {
"MODEL": {
    "Bag_lst": [1, 50, 100],
    "K_lst": [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    "agg_func": "weight",
    "atom_bag_percent": [0.5, 0.6, 0.7, 0.8, 0.9],
    "ignore_warning": True,
    "random_seed": 1,
    "replace_flag": False,
    "select_atom_percent": 0,
    "signal_bag_percent": [0.6, 0.8, 1],
},
"TEST": {
    "cv_num": 5,
    "m": 20,
    "model": "BOMP",
    "n": 600,
    "noise_levels": [0.12,0.14],
    "p": 1000,
    "trial_num": 20,
},
"filename": "BOMP_600_1000_20_nr_0713.yaml",
"hydra": {
    "hydra_logging": {
        "level": "CRITICAL"
    },
    "job_logging": {
        "level": "CRITICAL"
    },
    "run": {
        "dir": "memory/0713/"
    }
}
}

n_tmp = configs["TEST"]["n"]
p_tmp = configs["TEST"]["p"]
m_tmp = configs["TEST"]["m"]
noise_level_lst = configs["TEST"]["noise_levels"]
model_name = configs["TEST"]["model"]
cv_num = configs["TEST"]["cv_num"]
trial_num = configs["TEST"]["trial_num"]

# Get n, p, m, noise_level combinations
if not isinstance(n_tmp, list):
    n_tmp = [n_tmp]
if not isinstance(p_tmp, list):
    p_tmp = [p_tmp]
if not isinstance(m_tmp, list):
    m_tmp = [m_tmp]

npm_lst = list(product(n_tmp, p_tmp, m_tmp))

if not isinstance(noise_level_lst, list):
    noise_level_lst = [noise_level_lst]

# Get model parameters
fixed_params, param_grid = get_model_params(configs)


In [5]:
model = BOMP(**fixed_params)


n = npm_lst[0][0]
p = npm_lst[0][1]
m = npm_lst[0][2]
noise_level = noise_level_lst[0]
trial_id = 1

Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
(
    true_signal,
    dictionary,
    true_indices,
    true_coefficients,
    perturbed_signal,
) = Data_Geneartor.shuffle()
X_train, X_test, y_train, y_test = train_test_split(
    dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
)
temp = BOMP(atom_bag_percent=0.5,signal_bag_percent=0.6)
temp.fit(X_train, y_train)

[[ 1.          2.          0.03570581]
 [ 2.          2.          0.03391079]
 [ 3.         10.          0.03214786]
 [ 4.         10.          0.02321187]
 [ 5.         10.          0.02006312]
 [ 6.          9.          0.01870923]
 [ 7.         10.          0.01834008]
 [ 8.         10.          0.01671824]
 [ 9.         10.          0.01620185]
 [10.         10.          0.01539618]]


(array([ 0.0176708 ,  0.0897417 , -0.19900178, -0.20356613, -0.39247884,
        -0.04534136,  0.02329362, -0.00807141,  0.04234115, -0.22812841,
         0.06144568,  0.06377116, -0.03995793, -0.02524358,  0.08362938,
         0.11015203, -0.24173193, -0.05529201, -0.16509656, -0.29620369,
         0.08650509, -0.12164466, -0.03526979,  0.1273927 , -0.16606272,
        -0.30955915, -0.28243949,  0.19462311, -0.19626745, -0.0775108 ,
        -0.16472297, -0.08178328, -0.02293125, -0.07782738,  0.22882003,
         0.16040242,  0.32243686, -0.11047274, -0.12691691, -0.10384123,
        -0.10234581, -0.03565459, -0.02175577, -0.09290734, -0.10020372,
         0.31365528, -0.05551097,  0.17813072, -0.14844389,  0.05517889,
        -0.01707766,  0.12848172, -0.04521652,  0.08131137, -0.08019427,
         0.16627647, -0.08981737, -0.28572761, -0.10227327, -0.02346763,
         0.3446025 , -0.03167949, -0.11196733,  0.0915824 ,  0.03216116,
        -0.14663135, -0.17637674,  0.0218942 ,  0.1

In [10]:
model = OMP_Augmented()
model.fit(X_train, y_train)
prediction = model.predict(X_test)
prediction.shape

(120, 1)

In [14]:
model.error_series.shape

(20,)

In [12]:
y_test.shape

(120, 1)

In [6]:
# model = BOMP(**fixed_params)


# n = npm_lst[0][0]
# p = npm_lst[0][1]
# m = npm_lst[0][2]
# noise_level = noise_level_lst[0]
# trial_id = 1

# Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
# (
#     true_signal,
#     dictionary,
#     true_indices,
#     true_coefficients,
#     perturbed_signal,
# ) = Data_Geneartor.shuffle()
# X_train, X_test, y_train, y_test = train_test_split(
#     dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
# )
# gs = GridSearchCV(
#     model,
#     param_grid,
#     cv=cv_num,
#     scoring="neg_mean_squared_error",
#     n_jobs=-1,
#     verbose=0,
# )
# gs.fit(X_train, y_train)
# cv_err_lst = -gs.cv_results_["mean_test_score"]