In [1]:
import hydra
from omegaconf import DictConfig, OmegaConf
import numpy as np
import pickle as pkl
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product
from datetime import datetime
# from algorithms import BOMP
from data_generation import *

import warnings
warnings.filterwarnings("ignore")

In [2]:

def run_trials_npm_multi_noise_lvl(
    n, p, m, noise_level_lst, model_name, fixed_params, param_grid, cv_num, trial_num
):
    # get the model

    if model_name == "BOMP":
        model = BOMP(**fixed_params)

    res_log_npm = {
        "parameters": {
            "n": n,
            "p": p,
            "m": m,
            "noise_level_lst": noise_level_lst,
            "model_name": model_name,
            "cv_num": cv_num,
            "trial_num": trial_num,
            "param_grid": param_grid,
            "fixed_params": fixed_params,
        },
        "noise_level_lowest_MSE": [],
        "log": [],
    }
    print(f"Running trials for n = {n}, p = {p}, m = {m}")
    for noise_level in noise_level_lst:
        print("Cross validating alpha under noise level: ", noise_level)
        trials_loweset_cv_MSE_temp = []
        trials_testing_score_temp = []
        for trial_id in range(trial_num):
            print("Trial: ", trial_id)
            Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
            (
                true_signal,
                dictionary,
                true_indices,
                true_coefficients,
                perturbed_signal,
            ) = Data_Geneartor.shuffle()
            X_train, X_test, y_train, y_test = train_test_split(
                dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
            )
            gs = GridSearchCV(
                model,
                param_grid,
                cv=cv_num,
                scoring="neg_mean_squared_error",
                n_jobs=-1,
                verbose=0,
            )
            gs.fit(X_train, y_train)
            cv_err_lst = -gs.cv_results_["mean_test_score"]
            param_lst = gs.cv_results_["params"]
            best_estimator = gs.best_estimator_
            best_estimator.set_bag_lst([best_estimator.optimal_bag])
            best_estimator.set_K_lst([best_estimator.optimal_k])
            best_estimator.fit(X_train, y_train)
            testing_error = mean_squared_error(y_test, best_estimator.predict(X_test))
            trials_testing_score_temp.append(testing_error)
            lowest_cv_error = np.min(cv_err_lst)
            trials_loweset_cv_MSE_temp.append(lowest_cv_error)
            best_params = gs.best_params_
            reslog_one_trial = {
                "noise_level": noise_level,
                "trial": trial_id,
                "cv_error_lst": cv_err_lst,
                "lowest_cv_error": lowest_cv_error,
                "best_params": best_params,
                "best_bag": best_estimator.optimal_bag,
                "best_k": best_estimator.optimal_k,
                "param_lst": param_lst,
                "testing_error": testing_error,
            }
            res_log_npm["log"].append(reslog_one_trial)
            print(
                "Trial: ",
                trial_id,
                " Best params: ",
                best_params,
                " Lowest Error: ",
                lowest_cv_error,
                " Testing Error: ",
                testing_error,
            )
        res_log_npm["noise_level_lowest_cv_MSE"].append(
            np.mean(trials_loweset_cv_MSE_temp)
        )
        res_log_npm["trials_testing_score"].append(np.mean(trials_testing_score_temp))
        print(
            "Noise level: ",
            noise_level,
            " Avg Testing Lowest MSE: ",
            np.mean(trials_testing_score_temp),
        )
    return res_log_npm

In [3]:
def get_model_params(config):
    # all_params = OmegaConf.to_container(config, resolve=True)["MODEL"]
    all_params = config["MODEL"]
    param_grid = {}
    fixed_params = {}

    Bag_lst = all_params["Bag_lst"]
    K_lst = all_params["K_lst"]

    del all_params["Bag_lst"]
    del all_params["K_lst"]

    for param, value in all_params.items():
        if isinstance(value, list):
            param_grid[param] = value
        else:
            fixed_params[param] = value

    fixed_params["Bag_lst"] = Bag_lst
    fixed_params["K_lst"] = K_lst
    return fixed_params, param_grid



In [4]:
configs = {
"MODEL": {
    "Bag_lst": [1, 50, 100],
    "K_lst": [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    "agg_func": "weight",
    "atom_bag_percent": [0.5, 0.6, 0.7, 0.8, 0.9],
    "ignore_warning": True,
    "random_seed": 1,
    "replace_flag": False,
    "select_atom_percent": 0,
    "signal_bag_percent": [0.6, 0.8, 1],
},
"TEST": {
    "cv_num": 5,
    "m": 20,
    "model": "BOMP",
    "n": 600,
    "noise_levels": [0.12,0.14],
    "p": 1000,
    "trial_num": 20,
},
"filename": "BOMP_600_1000_20_nr_0713.yaml",
"hydra": {
    "hydra_logging": {
        "level": "CRITICAL"
    },
    "job_logging": {
        "level": "CRITICAL"
    },
    "run": {
        "dir": "memory/0713/"
    }
}
}

n_tmp = configs["TEST"]["n"]
p_tmp = configs["TEST"]["p"]
m_tmp = configs["TEST"]["m"]
noise_level_lst = configs["TEST"]["noise_levels"]
model_name = configs["TEST"]["model"]
cv_num = configs["TEST"]["cv_num"]
trial_num = configs["TEST"]["trial_num"]

# Get n, p, m, noise_level combinations
if not isinstance(n_tmp, list):
    n_tmp = [n_tmp]
if not isinstance(p_tmp, list):
    p_tmp = [p_tmp]
if not isinstance(m_tmp, list):
    m_tmp = [m_tmp]

npm_lst = list(product(n_tmp, p_tmp, m_tmp))

if not isinstance(noise_level_lst, list):
    noise_level_lst = [noise_level_lst]

# Get model parameters
fixed_params, param_grid = get_model_params(configs)


In [11]:
import numpy as np
from sklearn.base import BaseEstimator

# This file contains classes for different pursuit algorithms


class SignalAtomBagging:
    def __init__(
        self,
        N,
        signal_bag_percent=0.7,
        atom_bag_percent=0.7,
        replace_flag=True,
        random_seed=None,
    ):
        """ "
        This class is used to perform signal bagging

        Args:
        N (int): Number of bootstrap samples
        signal_bag_percent (float): Percentage of the original signal
        replace_flag (bool): Whether to sample with replacement
        random_seed (int): Random
        """
        self.s = None
        self.phi = None
        self.N = N
        self.replace_flag = replace_flag
        self.random_seed = random_seed
        self.signal_bag_percent = signal_bag_percent
        self.atom_bag_percent = atom_bag_percent
        self.s_bag = []
        self.phi_bag = []
        self.col_idx_bag = []

    def fit(self, phi, s):
        """
        Args:
        s (numpy.ndarray): Input signal
        phi (numpy.ndarray): Dictionary
        """

        self.s = s
        self.phi = phi

        num_samples = int(self.signal_bag_percent * self.s.shape[0])
        num_atoms = int(self.atom_bag_percent * self.phi.shape[1])

        if self.random_seed is not None:
            np.random.seed(self.random_seed)

        if self.signal_bag_percent:
            for _ in range(self.N):
                row_indices = np.random.choice(
                    self.s.shape[0], num_samples, replace=self.replace_flag
                )
                col_indices = np.random.choice(
                    self.phi.shape[1], num_atoms, replace=False
                )
                s_tmp = self.s[row_indices]
                phi_tmp = self.phi[row_indices, :][:, col_indices]
                self.s_bag.append(s_tmp)
                self.phi_bag.append(phi_tmp)
                self.col_idx_bag.append(col_indices)
        else:
            self.s_bag = [self.s] * self.N
            for _ in range(self.N):
                col_indices = np.random.choice(
                    self.phi.shape[1], num_atoms, replace=False
                )
                phi_tmp = self.phi[:, col_indices]
                self.phi_bag.append(phi_tmp)
                self.col_idx_bag.append(col_indices)

        return self.s_bag, self.phi_bag, self.col_idx_bag


class AtomBaggingBase(BaseEstimator):
    # Submodel base
    def __init__(
        self,
        K,
        select_atom_percent=0,
        random_seed=0,
        ignore_warning=False,
    ):
        """
        Args:

        This class is used to perform atom bagging
        Each object of this class is a submodel

        K (int): Number of iterations
        atom_bag_percent (float): Percentage of the original dictionary
        select_atom_percent (float): Percentage of the selected atoms
        random_seed (int): Random seed
        """

        self.K = K
        self.select_atom_percent = np.max([0, np.min([1, select_atom_percent])])
        self.atom_weak_select_flag = select_atom_percent > 0

        self.indices = []
        self.s = None
        self.phi = None
        self.a = None
        self.coefficients = None
        self.r = None

        self.random_seed = random_seed
        self.ignore_warning = ignore_warning

    def reset(self):
        self.indices = []
        self.s = None
        self.phi = None
        self.a = None
        self.coefficients = None
        self.r = None

    def fit(self, phi, s):
        return None

    def predict(self, phi_test):
        """
        Args:
        phi_test (numpy.ndarray): Test data

        Returns:
        numpy.ndarray: Predicted output
        """
        return (phi_test @ self.coefficients).reshape(-1, 1)

    def score(self, phi_test, s_test):
        # return self.coefficients
        s_pred = phi_test @ self.coefficients
        pred_mse = np.mean((s_pred - s_test) ** 2)
        return pred_mse

    def input_coefficients(self, coefficients):
        self.coefficients = coefficients

    def update_seed(self, random_seed):
        self.random_seed = random_seed


class AtomBaggingMatchingPursuit(AtomBaggingBase):
    def __init__(self, K, atom_bag_percent=1, select_atom_percent=0, random_seed=0):
        """
        This class is used to perform atom bagging with matching pursuit

        Args:
        K (int): Number of iterations
        atom_bag_percent (float): Percentage of the original dictionary
        select_atom_percent (float): Percentage of the selected atoms
        random_seed (int): Random seed
        """

        super().__init__(K, atom_bag_percent, select_atom_percent, random_seed)

    def fit(self, phi, s):
        """
        Args:
        s (numpy.ndarray): Input signal
        phi (numpy.ndarray): Dictionary
        """
        self.reset()

        if s.ndim == 1:
            self.s = s.reshape(-1, 1)
        else:
            self.s = s
        self.phi = phi
        self.a = np.zeros_like(self.s)
        self.coefficients = np.zeros(phi.shape[1])
        self.r = self.s.copy()

        if self.random_seed is not None:
            np.random.seed(self.random_seed)

        for i in range(self.K):
            inner_products = (phi.T @ self.r).flatten()
            if self.atom_bag_flag:
                dropping_indices = np.random.choice(
                    phi.shape[1],
                    int(phi.shape[1] * (1 - self.atom_bag_percent)),
                    replace=False,
                )
                inner_products[dropping_indices] = 0
            if self.atom_weak_select_flag:
                top_ind = np.argsort(np.abs(inner_products))[::-1][
                    : int(phi.shape[1] * self.select_atom_percent)
                ]
                # randomly select one atom
                lambda_k = np.random.choice(top_ind)
            else:
                lambda_k = np.argmax(np.abs(inner_products))
            self.indices.append(lambda_k)
            self.coefficients[lambda_k] = (
                self.coefficients[lambda_k] + inner_products[lambda_k]
            )
            self.a += inner_products[lambda_k] * phi[:, lambda_k].reshape(-1, 1)
            self.r = self.s - self.a
        return self.a, self.coefficients


class OMP_Augmented(AtomBaggingBase):
    def __init__(
        self, K_lst, select_atom_percent=0, random_seed=None, ignore_warning=False
    ):
        self.K_lst = K_lst
        self.random_seed = random_seed
        self.select_atom_percent = select_atom_percent
        if select_atom_percent == 0:
            self.atom_weak_select_flag = False

        self.indices = []
        self.coefficients = None
        self.ignore_warning = ignore_warning

        self.coefficients_matrix = None
        self.error_series = []

    def fit(self, phi, s):
        """
        Args:
        s (numpy.ndarray): Input signal
        phi (numpy.ndarray): Dictionary
        """
        self.reset()
        self.s = s
        self.phi = phi
        self.a = np.zeros_like(self.s)
        self.coefficients = np.zeros(phi.shape[1])
        self.r = self.s.copy()

        self.coefficients_matrix = np.zeros((phi.shape[1], len(self.K_lst)))
        self.error_series = []
        if self.random_seed is not None:
            np.random.seed(self.random_seed)

        for k in range(np.max(self.K_lst)):
            inner_products = (phi.T @ self.r).flatten()
            # so that we will not select the same atom
            inner_products[self.indices] = 0
            if self.atom_weak_select_flag:
                top_ind = np.argsort(np.abs(inner_products))[::-1][
                    : int(phi.shape[1] * self.select_atom_percent)
                ]
                # randomly select one atom
                lambda_k = np.random.choice(top_ind)
            else:
                lambda_k = np.argmax(np.abs(inner_products))

            # Ordinary least squares
            X = phi[:, self.indices + [lambda_k]]

            try:
                betas = np.linalg.inv(X.T @ X) @ X.T @ self.s
            except:
                if not self.ignore_warning:
                    print("Singular matrix encountered in OMP")
                break

            # Update indices
            self.indices.append(lambda_k)

            # Update Coefficients
            self.coefficients = np.zeros(phi.shape[1])
            self.coefficients[self.indices] = betas.flatten()

            # Update Projection
            self.a = X @ betas

            # Update Residual
            self.r = self.s - self.a
            if (k+1) in self.K_lst:
                self.coefficients_matrix[:, self.K_lst.index(k+1)] = self.coefficients
                self.error_series.append(np.sum(self.r**2))

        minimal_k_index = np.argmin(self.error_series)

        # Update Coefficients

        self.coefficients = self.coefficients_matrix[:, minimal_k_index]

        # Update Projection
        self.a = phi @ self.coefficients

        # Update Residual
        self.r = self.s - self.a

        return self.a, self.coefficients

    def multi_score(self, phi_test, s_test):
        """
        Args:
        phi_test (numpy.ndarray): Test data
        s_test (numpy.ndarray): Test labels

        Returns:
        numpy.ndarray: Predicted output
        """

        test_score = []
        projection_matrix = phi_test @ self.coefficients_matrix
        residual_matrix = s_test.reshape(-1, 1) - projection_matrix
        test_score = np.mean(residual_matrix**2, axis=0)
        return test_score

    def reset(self):
        super().reset()
        self.coefficients_matrix = None
        self.error_series = []


class BOMP(AtomBaggingBase):
    def __init__(
        self,
        Bag_lst= list(range(1,11)),
        K_lst = list(range(1, 11)),
        signal_bag_percent=0.7,
        atom_bag_percent=1,
        select_atom_percent=0,
        replace_flag=True,
        agg_func="weight",
        random_seed=None,
        ignore_warning=False,
    ):
        """
        Args:
        N (int): Number of submodels
        K (int): Number of iterations
        signal_bag_percent (float): Percentage of the original signal
        atom_bag_percent (float): Percentage of the original dictionary
        select_atom_percent (float): Percentage of the selected atoms
        replace_flag (bool): Whether to replace the samples
        agg_func (str): Aggregation function
        random_seed (int): Random seed
        """

        self.Bag_lst = Bag_lst
        self.K_lst = K_lst
        self.signal_bag_percent = signal_bag_percent
        self.atom_bag_percent = atom_bag_percent
        self.select_atom_percent = select_atom_percent
        self.replace_flag = replace_flag
        self.agg_func = agg_func
        self.random_seed = random_seed
        self.ignore_warning = ignore_warning
        self.s = None
        self.phi = None
        self.tmpPursuitModel = OMP_Augmented(
            K_lst, select_atom_percent, random_seed, ignore_warning
        )
        self.SignalBagging = None
        self.coefficients = None
        self.a = None

    def agg_weight_with_error(self, c_lst, mse_lst):
        """
        This function is used to aggregate the coefficients with the inverse of the mean squared error

        Args:
        c_lst (list): List of coefficients
        mse_lst (list): List of mean squared errors
        """
        # Calculate the weight
        mse_lst = np.array(mse_lst)
        weight = 1 / mse_lst
        weight = weight / np.sum(weight)

        # Calculate the weighted average
        tot = np.zeros_like(c_lst[0])
        for i in range(len(c_lst)):
            tot += c_lst[i] * weight[i]
        return tot

    def agg_weight_with_avg(self, c_lst):
        """
        This function is used to aggregate the coefficients with the inverse of the mean squared error

        Args:
        c_lst (list): List of coefficients
        """
        # Calculate the weighted average
        tot = np.zeros_like(c_lst[0])
        for i in range(len(c_lst)):
            tot += c_lst[i]
        return tot / len(c_lst)

    def fit(self, phi, s):
        """
        Args:
        s (numpy.ndarray): Input signal
        phi (numpy.ndarray): Dictionary
        """

        self.reset()

        self.s = s
        self.phi = phi
        self.SignalBagging = SignalAtomBagging(
            np.max(self.Bag_lst),
            self.signal_bag_percent,
            self.atom_bag_percent,
            self.replace_flag,
            self.random_seed,
        )
        self.SignalBagging.fit(self.phi, self.s)
        self.coefficients_matrix = None
        s_bag = self.SignalBagging.s_bag
        phi_bag = self.SignalBagging.phi_bag
        col_idx_bag = self.SignalBagging.col_idx_bag
        self.coefficients_cubic = np.zeros((np.max(self.Bag_lst), phi.shape[1], len(self.K_lst)))
        self.coefficients_matrix = np.zeros((phi.shape[1], len(self.K_lst)))
        self.bag_k_error_matrix = np.zeros((len(self.Bag_lst), 3))


        if self.random_seed is not None:
            np.random.seed(self.random_seed)

        for i in range(np.max(self.Bag_lst)):
            sub_s = s_bag[i]
            sub_phi = phi_bag[i]
            sub_idx = col_idx_bag[i]
            self.tmpPursuitModel = OMP_Augmented(
                self.K_lst,
                self.select_atom_percent,
                np.random.randint(10 * np.max(self.Bag_lst)),
                self.ignore_warning,
            )
            self.tmpPursuitModel.fit(sub_phi, sub_s)
            real_sub_coefficients = np.zeros((phi.shape[1], len(self.K_lst)))
            real_sub_coefficients[sub_idx, :] = self.tmpPursuitModel.coefficients_matrix
            self.coefficients_cubic[i,:,:] = real_sub_coefficients
            self.tmpPursuitModel.reset()
            if (i+1) in self.Bag_lst:
                counted_array = np.array(
                    np.unique(np.concatenate(col_idx_bag[: i + 1]), return_counts=True)
                )
                temp_coefficients_matrix = self.coefficients_cubic.sum(axis=0)
                counted_array = counted_array[:,np.argsort(counted_array[0])]
                filled_array = np.zeros_like(phi[0])

                if (counted_array.shape[1] < phi.shape[1]):
                    temp_coefficients_matrix[counted_array[0, :], :] = ((temp_coefficients_matrix[counted_array[0, :], :]).T/ counted_array[1, :]).T
                else:
                    temp_coefficients_matrix = ((temp_coefficients_matrix).T/ counted_array[1, :]).T
                temp_projection_matrix = phi @ temp_coefficients_matrix
                temp_residual_matrix = s.reshape(-1, 1) - temp_projection_matrix
                temp_error_series = np.mean(temp_residual_matrix ** 2, axis=0)
                temp_optimal_idx = np.argmin(temp_error_series)
                self.bag_k_error_matrix[self.Bag_lst.index(i+1), :] = np.array([i+1, self.K_lst[temp_optimal_idx], temp_error_series[temp_optimal_idx]])

        self.optimal_idx = np.argmin(self.bag_k_error_matrix[:, 2])

        self.optimal_k = int(self.bag_k_error_matrix[self.optimal_idx, 1])

        self.optimal_bag = int(self.bag_k_error_matrix[self.optimal_idx, 0])


        counted_array = np.array(
            np.unique(np.concatenate(col_idx_bag[: self.optimal_bag]), return_counts=True)
        )
        temp_coefficients_matrix = self.coefficients_cubic.sum(axis=0)
        counted_array = counted_array[:,np.argsort(counted_array[0])]
        filled_array = np.zeros_like(phi[0])
        if (counted_array.shape[1] < phi.shape[1]):
            self.coefficients_matrix[counted_array[0, :], :] = ((temp_coefficients_matrix[counted_array[0, :], :]).T/ counted_array[1, :]).T
        else:
            self.coefficients_matrix = ((temp_coefficients_matrix).T/ counted_array[1, :]).T

        self.coefficients = self.coefficients_matrix[:, self.K_lst.index(self.optimal_k)]

        # Update Projection
        self.a = phi @ self.coefficients

        # Update Residual
        self.r = self.s - self.a
        return self.a, self.coefficients

    def reset(self):
        """
        This function is used to reset the model
        """
        super().reset()
        self.coefficients_matrix = None
        self.coefficients_cubic = None
        self.error_series = []
        self.coefficients = None
        self.a = None

    def set_Bag_lst(self, bag_lst):
        """
        This function is used to set the bag_lst

        Args:
        bag_lst (list): List of bag size
        """
        self.Bag_lst = bag_lst
    
    def set_K_lst(self, k_lst):
        self.K_lst = k_lst

    def get_params(self, deep=True):
    # This assumes all parameters are primitives
        return {
            "Bag_lst": self.Bag_lst,
            "K_lst": self.K_lst,
            "signal_bag_percent": self.signal_bag_percent,
            "atom_bag_percent": self.atom_bag_percent,
            "select_atom_percent": self.select_atom_percent,
            "replace_flag": self.replace_flag,
            "agg_func": self.agg_func,
            "random_seed": self.random_seed,
            "ignore_warning": self.ignore_warning,
        }

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

In [None]:
test_array = np.arange(100)
np.random.shuffle(test_array)
chosen = test_array[:60]
counted_array = np.array(
    np.unique(chosen, return_counts=True)
)
# Create a new array filled with zeros
filled_array = np.zeros_like(test_array)
# Put the counted values with their corresponding index in the new array
filled_array[counted_array[0]] = counted_array[1]
non_zero_idx = counted_array[0]


In [12]:
model = BOMP(**fixed_params)


n = npm_lst[0][0]
p = npm_lst[0][1]
m = npm_lst[0][2]
noise_level = noise_level_lst[0]
trial_id = 1

Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
(
    true_signal,
    dictionary,
    true_indices,
    true_coefficients,
    perturbed_signal,
) = Data_Geneartor.shuffle()
X_train, X_test, y_train, y_test = train_test_split(
    dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
)
temp = BOMP(atom_bag_percent=0.5,signal_bag_percent=0.6)
temp.fit(X_train, y_train)

(array([ 3.81511933e-02,  1.01722459e-01, -3.00705959e-01, -2.57130683e-01,
        -4.31151317e-01, -6.76168481e-02,  5.90171082e-02, -8.35489350e-02,
        -4.02525978e-02, -2.72826389e-01,  1.00383045e-01, -2.38160455e-02,
        -5.76591471e-02,  7.73288011e-02,  8.95581979e-02,  1.79916256e-01,
        -1.98328507e-01, -2.56895552e-02, -1.59716271e-01, -4.52044746e-01,
        -6.86218994e-04, -1.95897634e-01, -1.13165087e-01,  1.48071296e-01,
        -2.54993490e-01, -3.15419133e-01, -2.31398154e-01,  1.89817758e-01,
        -2.46644257e-01, -1.28533603e-01, -1.09594686e-01, -1.32967121e-01,
         1.37834383e-02,  2.34380424e-03,  3.47138167e-01,  2.40401239e-01,
         3.25598053e-01, -1.30954191e-01, -1.56820863e-01, -1.31206749e-01,
        -1.53254947e-01, -1.55380209e-01,  2.59314590e-02, -1.14410030e-01,
        -1.46879773e-01,  3.61213519e-01, -8.66151012e-02,  1.66836982e-01,
        -2.20509239e-01,  5.48171310e-03, -1.43460921e-02,  2.26123754e-01,
        -8.6

In [13]:
model = BOMP(**fixed_params)


n = npm_lst[0][0]
p = npm_lst[0][1]
m = npm_lst[0][2]
noise_level = noise_level_lst[0]
trial_id = 1

Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
(
    true_signal,
    dictionary,
    true_indices,
    true_coefficients,
    perturbed_signal,
) = Data_Geneartor.shuffle()
X_train, X_test, y_train, y_test = train_test_split(
    dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
)
gs = GridSearchCV(
    model,
    param_grid,
    cv=cv_num,
    scoring="neg_mean_squared_error",
    n_jobs=-1,
    verbose=1,
)
gs.fit(X_train, y_train)
cv_err_lst = -gs.cv_results_["mean_test_score"]

Fitting 5 folds for each of 15 candidates, totalling 75 fits


In [15]:
temp = gs.best_estimator_
temp.coefficients.shape
-gs.cv_results_["mean_test_score"]

array([2.00818286e-02, 1.94018281e-02, 8.67035617e+01, 2.04968373e-02,
       2.01437268e-02, 1.25108641e+02, 2.07596826e-02, 2.22894233e-02,
       1.33960626e+02, 1.91418538e-02, 2.15422021e-02, 2.51623050e-02,
       1.82988671e-02, 2.04391895e-02, 3.49850498e-02])

In [None]:
temp