In [1]:
import argparse
import yaml
import numpy as np
import pickle as pkl
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from itertools import product
from datetime import datetime
import json
import hashlib
import os
from algorithms import OMP_Augmented
from data_generation import *


In [7]:
def get_model_params(config):
    OMP_arg_lst = ["K_lst", "select_atom_percent", "random_seed","ignore_warning"]
    all_params = config["MODEL"]
    param_grid = {}
    fixed_params = {}

    Bag_lst = all_params["Bag_lst"]
    K_lst = all_params["K_lst"]

    del all_params["Bag_lst"]
    del all_params["K_lst"]

    for param, value in all_params.items():
        if param in OMP_arg_lst:
            if isinstance(value, list):
                param_grid[param] = value
            else:
                fixed_params[param] = value

    fixed_params["K_lst"] = K_lst
    return fixed_params, param_grid

In [8]:
path = "configs/BOMP_300_500_10_nr_0721.yaml"

with open(path, "r") as path:
    configs = yaml.load(path, Loader=yaml.FullLoader)


In [9]:
configs

{'MODEL': {'Bag_lst': [1, 100, 200, 300, 400, 500],
  'K_lst': [1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   41,
   42,
   43,
   44,
   45,
   46,
   47,
   48,
   49,
   50,
   51,
   52,
   53,
   54,
   55,
   56,
   57,
   58,
   59,
   60,
   61,
   62,
   63,
   64,
   65,
   66,
   67,
   68,
   69,
   70,
   71,
   72,
   73,
   74,
   75,
   76,
   77,
   78,
   79,
   80,
   81,
   82,
   83,
   84,
   85,
   86,
   87,
   88,
   89,
   90,
   91,
   92,
   93,
   94,
   95,
   96,
   97,
   98,
   99,
   100,
   101,
   102,
   103,
   104,
   105,
   106,
   107,
   108,
   109,
   110,
   111,
   112,
   113,
   114,
   115,
   116,
   117,
   118,
   119,
   120,
   121,
   122,
   123,
   124,
   125,
   126,
   127,
   128,
   129,
   130,
   131,

In [10]:
n_tmp = configs["TEST"]["n"]
p_tmp = configs["TEST"]["p"]
m_tmp = configs["TEST"]["m"]
noise_level_lst = configs["TEST"]["noise_levels"]
model_name = configs["TEST"]["model"]
cv_num = configs["TEST"]["cv_num"]
trial_num = configs["TEST"]["trial_num"]

# Get n, p, m, noise_level combinations
if not isinstance(n_tmp, list):
    n_tmp = [n_tmp]
if not isinstance(p_tmp, list):
    p_tmp = [p_tmp]
if not isinstance(m_tmp, list):
    m_tmp = [m_tmp]

npm_lst = list(product(n_tmp, p_tmp, m_tmp))

if not isinstance(noise_level_lst, list):
    noise_level_lst = [noise_level_lst]

# Get model parameters
fixed_params, param_grid = get_model_params(configs)

In [11]:
fixed_params

 'random_seed': 1,
 'select_atom_percent': 0,
 'K_lst': [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  1

In [15]:
param_grid

{}

In [13]:
noise_level_lst

[0.12, 0.14, 0.16, 0.18, 0.2]

In [17]:
n = n_tmp[0]
p = p_tmp[0]
m = m_tmp[0]

noise_level = 0.12

In [22]:
model = OMP_Augmented(**fixed_params)

In [23]:
trial_id = 1
Data_Geneartor = GaussianDataGenerator(p, n, m, noise_level, trial_id)
(
    true_signal,
    dictionary,
    true_indices,
    true_coefficients,
    perturbed_signal,
) = Data_Geneartor.shuffle()
X_train, X_test, y_train, y_test = train_test_split(
    dictionary, perturbed_signal, test_size=0.2, random_state=trial_id
)

In [32]:
y_train.shape

(240, 1)

In [24]:
model.fit(X_train, y_train)

(array([-0.02963101,  0.06034284, -0.01860876, -0.04080628,  0.01250148,
        -0.01356313,  0.00942813, -0.07214399,  0.0250147 , -0.01157247,
         0.0088984 , -0.02773164,  0.07344428, -0.12257993, -0.0129917 ,
         0.03183239, -0.05303426,  0.02887242, -0.02819892,  0.0155397 ,
        -0.06475611,  0.09352532, -0.01602831,  0.01976662, -0.01759588,
        -0.11666707,  0.00459419, -0.05259657, -0.08866643, -0.11682676,
        -0.0056279 ,  0.0113685 ,  0.05684153, -0.03253778, -0.0676847 ,
         0.08133003,  0.02002691,  0.00436147,  0.01796147,  0.04667126,
        -0.00418362, -0.06299868,  0.03320004,  0.02114957,  0.06720135,
         0.11071086,  0.04379502, -0.07310846, -0.01963151, -0.03518139,
        -0.01792364, -0.0101306 ,  0.06017013,  0.02726948,  0.02964775,
        -0.06853595,  0.02934064, -0.03059471,  0.01288256, -0.11219385,
         0.02385188, -0.0525922 ,  0.00199547, -0.04378006,  0.01695887,
         0.0322783 , -0.04450347,  0.03605178,  0.0

In [25]:
model.optimal_k

1

In [26]:
K_lst = fixed_params["K_lst"]
random_seed = fixed_params["random_seed"]
select_atom_percent = 0
atom_weak_select_flag = False

indices = []
coefficients = None
coefficients_matrix = None
error_series = []

In [27]:
s = y_train
phi = X_train
a = np.zeros_like(s)
coefficients = np.zeros(phi.shape[1])
r = s.copy()

In [28]:
coefficients_matrix = np.zeros((phi.shape[1], len(K_lst)))
error_series = np.zeros(len(K_lst))
if random_seed is not None:
    np.random.seed(random_seed)

In [31]:
K_lst = [1,10,20]

In [None]:
for k in range(np.max(K_lst)):
    inner_products = (phi.T @ r).flatten()
    # so that we will not select the same atom
    inner_products[indices] = 0
    lambda_k = np.argmax(np.abs(inner_products))

    # Ordinary least squares
    X = phi[:, indices + [lambda_k]]

    try:
        betas = np.linalg.inv(X.T @ X) @ X.T @ s
    except:
        print("Singular matrix encountered in OMP")
        break

    # Update indices
    indices.append(lambda_k)


    ## FIXME:: Lazy David found that you can skip determining the optimal k and calculate the error with the whole matrix with the right indexing
    # Update Coefficients
    temp_coefficients_vector = np.zeros(phi.shape[1])
    temp_coefficients_vector[indices] = betas.flatten()
    temp_projection_vector = phi @ temp_coefficients_vector
    temp_residual_vector = s - temp_projection_vector

    if (k+1) in K_lst:
        coefficients_matrix[:, K_lst.index(k+1)] = temp_coefficients_vector
        error_series[K_lst.index(k+1)] = np.mean(temp_residual_vector**2)

minimal_k_index = np.argmin(error_series)

optimal_k = K_lst[minimal_k_index]

# Update Coefficients

coefficients = coefficients_matrix[:, minimal_k_index]

# Update Projection
a = phi @ coefficients

# Update Residual
r = s - a