In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os.path as osp
import sys


def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)


this_dir = osp.dirname('../../train.py')

lib_path = osp.join(this_dir, 'lib')
add_path(lib_path)

In [2]:
from utils.dataloader import CERN_Dataset_V3, UMASS_Dataset_V2, DRED_Dataset
from torch.utils.data import DataLoader
from sklearn.decomposition import DictionaryLearning
from torch import nn
from sklearn.decomposition import PCA
import numpy as np
import torch
import matplotlib.pyplot as plt
from scipy.fftpack import dct, idct, fft, ifft
import pywt

### Build Datasets (freeze to 12 days)

In [3]:
cer_val_fc = CERN_Dataset_V3(train=False, mode = 'fc', no_days = 12)
cer_val_cnn = CERN_Dataset_V3(train=False, mode = 'cnn', no_days = 12)
umass_val_fc = UMASS_Dataset_V2(train=False, mode = 'fc', no_days = 12)
umass_val_cnn = UMASS_Dataset_V2(train=False, mode = 'cnn', no_days = 12)
dred_val_fc = DRED_Dataset(train = False, mode = 'fc', no_rows = 60,
                          wide_freq = 60)
dred_val_cnn = DRED_Dataset(train = False, mode = 'cnn', no_rows = 60,
                          wide_freq = 60, reshape_factor = 1)

100%|██████████| 200/200 [00:00<00:00, 261.14it/s]
100%|██████████| 200/200 [00:00<00:00, 4703.11it/s]
100%|██████████| 10/10 [00:00<00:00, 1482.09it/s]
100%|██████████| 10/10 [00:00<00:00, 1333.05it/s]


DRED processed.
DRED processed.


In [4]:
cer_test = cer_val_fc[0][0].numpy()
umass_test = umass_val_fc[0][0].numpy()
dred_test = dred_val_fc[0][0].numpy()

In [5]:
cer_test.shape

(576,)

In [6]:
umass_test.shape

(1152,)

In [7]:
dred_test.shape

(3600,)

In [8]:
required_ratios = [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 21, 30, 41]

In [12]:
loss = nn.MSELoss()

### PCA

$C_r = \dfrac{m \times n}{(m\times r+r + n)}$

In [21]:
def pca_compression_solver(m, n, c):
    
    """Check r to solve for required compression ratio
    """
    
    return (1/(m+1)) * (m*n/c - n)

In [46]:
cer_number_of_comp = [max(int(np.floor(pca_compression_solver(12, 48, c))), 0) for c in required_ratios]
umass_number_of_comp = [max(int(np.floor(pca_compression_solver(12, 96, c))), 0) for c in required_ratios]
dred_number_of_comp = [max(int(np.floor(pca_compression_solver(60, 60, c))), 0) for c in required_ratios]

In [12]:
pca_dataset = cer_val_fc.cern_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 48)
total_losses = {k: [] for k in cer_number_of_comp}
no_days = 12

for k in cer_number_of_comp:
    print('num_components: {}'.format(k))
    for record_no in range(pca_dataset.shape[0] // (no_days)):
        pca = PCA(min(k, no_days), svd_solver='auto')
        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        # print(current_record.shape)

        pca_test_compressed = pca.fit_transform(current_record)
        # print(pca_test_compressed.shape)
        pca_test_uncompressed = pca.inverse_transform(pca_test_compressed)
        pca_test_uncompressed = np.clip(pca_test_uncompressed, 0, 1) # clip

        # print(pca_test_uncompressed.shape)
        # break
        if (record_no + 1) % 20000 == 0: print('record: {}'.format(record_no))
        total_losses[k].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(pca_test_uncompressed)).item())
    
for k, ratio in zip(cer_number_of_comp, required_ratios):
    
    mse_avg_ = sum(total_losses[k]) / len(total_losses[k])
    
    print('k: {}, ratio: {}, mse average: {:.6f}'.format(k, ratio, mse_avg_))

num_components: 18
num_components: 11
num_components: 7
num_components: 3
num_components: 2
num_components: 1
num_components: 1
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
k: 18, ratio: 2, mse average: 0.000000
k: 11, ratio: 3, mse average: 0.000000
k: 7, ratio: 4, mse average: 0.000925
k: 3, ratio: 6, mse average: 0.004519
k: 2, ratio: 7, mse average: 0.006436
k: 1, ratio: 8, mse average: 0.009313
k: 1, ratio: 9, mse average: 0.009313
k: 0, ratio: 10, mse average: 0.015562
k: 0, ratio: 11, mse average: 0.015562
k: 0, ratio: 12, mse average: 0.015562
k: 0, ratio: 13, mse average: 0.015562
k: 0, ratio: 15, mse average: 0.015562
k: 0, ratio: 18, mse average: 0.015562
k: 0, ratio: 21, mse average: 0.015562
k: 0, ratio: 30, mse average: 0.015562
k: 0, ratio: 41, mse average: 0.015562


In [14]:
pca_dataset = umass_val_fc.umass_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 96)
total_losses = {k: [] for k in umass_number_of_comp}
no_days = 12

for k in umass_number_of_comp:
    print('num_components: {}'.format(k))
    for record_no in range(pca_dataset.shape[0] // (no_days)):
        pca = PCA(min(k, 12), svd_solver='auto')
        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        # print(current_record.shape)

        pca_test_compressed = pca.fit_transform(current_record)
        # print(pca_test_compressed.shape)
        pca_test_uncompressed = pca.inverse_transform(pca_test_compressed)
        pca_test_uncompressed = np.clip(pca_test_uncompressed, 0, 1) # clip

        # print(pca_test_uncompressed.shape)
        # break
        if (record_no + 1) % 20000 == 0: print('record: {}'.format(record_no))
        total_losses[k].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(pca_test_uncompressed)).item())
    
for k, ratio in zip(umass_number_of_comp, required_ratios):
    
    mse_avg_ = sum(total_losses[k]) / len(total_losses[k])
    
    print('k: {}, ratio: {}, mse average: {:.6f}'.format(k, ratio, mse_avg_))

num_components: 36
num_components: 22
num_components: 14
num_components: 7
num_components: 5
num_components: 3
num_components: 2
num_components: 1
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
num_components: 0
k: 36, ratio: 2, mse average: 0.000000
k: 22, ratio: 3, mse average: 0.000000
k: 14, ratio: 4, mse average: 0.000000
k: 7, ratio: 6, mse average: 0.002602
k: 5, ratio: 7, mse average: 0.005043
k: 3, ratio: 8, mse average: 0.008548
k: 2, ratio: 9, mse average: 0.010891
k: 1, ratio: 10, mse average: 0.014083
k: 0, ratio: 11, mse average: 0.019295
k: 0, ratio: 12, mse average: 0.019295
k: 0, ratio: 13, mse average: 0.019295
k: 0, ratio: 15, mse average: 0.019295
k: 0, ratio: 18, mse average: 0.019295
k: 0, ratio: 21, mse average: 0.019295
k: 0, ratio: 30, mse average: 0.019295
k: 0, ratio: 41, mse average: 0.019295


In [43]:
pca_dataset = dred_val_fc.dred_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 60)
total_losses = {k: [] for k in dred_number_of_comp}
no_days = 60

for k in dred_number_of_comp:
    print('num_components: {}'.format(k))
    for record_no in range(pca_dataset.shape[0] // (no_days)):
        pca = PCA(min(k, 12), svd_solver='auto')
        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        # print(current_record.shape)

        pca_test_compressed = pca.fit_transform(current_record)
        # print(pca_test_compressed.shape)
        pca_test_uncompressed = pca.inverse_transform(pca_test_compressed)
        pca_test_uncompressed = np.clip(pca_test_uncompressed, 0, 1) # clip

        # print(pca_test_uncompressed.shape)
        # break
        if (record_no + 1) % 20000 == 0: print('record: {}'.format(record_no))
        total_losses[k].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(pca_test_uncompressed)).item())
    
for k, ratio in zip(dred_number_of_comp, required_ratios):
    
    mse_avg_ = sum(total_losses[k]) / len(total_losses[k])
    
    print('k: {}, ratio: {}, mse average: {:.6f}'.format(k, ratio, mse_avg_))

num_components: 28
num_components: 18
num_components: 13
num_components: 8
num_components: 7
num_components: 6
num_components: 5
num_components: 4
num_components: 4
num_components: 3
num_components: 3
num_components: 2
num_components: 2
num_components: 1
num_components: 0
num_components: 0
k: 28, ratio: 2, mse average: 0.000000
k: 18, ratio: 3, mse average: 0.000000
k: 13, ratio: 4, mse average: 0.000000
k: 8, ratio: 6, mse average: 0.000001
k: 7, ratio: 7, mse average: 0.000002
k: 6, ratio: 8, mse average: 0.000005
k: 5, ratio: 9, mse average: 0.000014
k: 4, ratio: 10, mse average: 0.000045
k: 4, ratio: 11, mse average: 0.000045
k: 3, ratio: 12, mse average: 0.000166
k: 3, ratio: 13, mse average: 0.000166
k: 2, ratio: 15, mse average: 0.000682
k: 2, ratio: 18, mse average: 0.000682
k: 1, ratio: 21, mse average: 0.003953
k: 0, ratio: 30, mse average: 0.091783
k: 0, ratio: 41, mse average: 0.091783


Draw

In [15]:
# fig, axs = plt.subplots(2, 2, figsize=(12,10))

# for k, ax in zip(number_of_components, axs.reshape(-1)):
#     pca = PCA(k)
#     current_record = test_record.reshape(12, 48)
#     pca_test_compressed = pca.fit_transform(current_record)
#     pca_test_uncompressed = np.clip(pca.inverse_transform(pca_test_compressed), 0, 1)
#     temp_loss_ = loss(torch.from_numpy(current_record), 
#                                 torch.from_numpy(pca_test_uncompressed)).item()
#     ax.plot(pca_test_uncompressed.reshape(-1), 'r', label=k)
#     ax.plot(current_record.reshape(-1), 'b--', alpha=1)
#     ax.set_title('num_components {}, mse:{:.4f}'.format(k, temp_loss_))
# plt.show()

### FFT

In [28]:
cer_fft_coefficients_to_keep = [int(12 * 48 / k) for k in required_ratios]
umass_fft_coefficients_to_keep = [int(12 * 96 / k) for k in required_ratios]
dred_fft_coefficients_to_keep = [int(60 * 60 / k) for k in required_ratios]

In [17]:
cer_fft_coefficients_to_keep

[288, 192, 144, 96, 82, 72, 64, 57, 52, 48, 44, 38, 32, 27, 19, 14]

In [18]:
fft_dataset = cer_val_fc.cern_data.cpu().numpy()
fft_total_losses = {c: [] for c in cer_fft_coefficients_to_keep}

for c in cer_fft_coefficients_to_keep:
    for record_no in range(len(fft_dataset)):
        
        # run fft
        current_record = fft_dataset[record_no]
        compressed_fft = fft(current_record)
        
        # remove smallest magnitude
        c_sort = np.sort(compressed_fft)[::-1]
        thresh = c_sort[c]
        compressed_fft[compressed_fft <= thresh] = 0
        
        # uncompress
        uncompressed_fft = np.clip(ifft(compressed_fft).real, 0, 1)
        # uncompressed_fft = ifft(compressed_fft).real
        # uncompressed_fft = (uncompressed_fft - min(uncompressed_fft)) / (max(uncompressed_fft) - min(uncompressed_fft))
        
        # work out diff
        fft_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_fft)).item())
        
for c, ratio in zip(cer_fft_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(fft_total_losses[c]) / len(fft_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))

coeffs: 288, ratio: 2, mse average: 0.014828
coeffs: 192, ratio: 3, mse average: 0.016090
coeffs: 144, ratio: 4, mse average: 0.016890
coeffs: 96, ratio: 6, mse average: 0.017923
coeffs: 82, ratio: 7, mse average: 0.018301
coeffs: 72, ratio: 8, mse average: 0.018606
coeffs: 64, ratio: 9, mse average: 0.018883
coeffs: 57, ratio: 10, mse average: 0.019167
coeffs: 52, ratio: 11, mse average: 0.019350
coeffs: 48, ratio: 12, mse average: 0.019535
coeffs: 44, ratio: 13, mse average: 0.019727
coeffs: 38, ratio: 15, mse average: 0.020029
coeffs: 32, ratio: 18, mse average: 0.020406
coeffs: 27, ratio: 21, mse average: 0.020792
coeffs: 19, ratio: 30, mse average: 0.021525
coeffs: 14, ratio: 41, mse average: 0.022015


In [19]:
fft_dataset = umass_val_fc.umass_data.cpu().numpy()
fft_total_losses = {c: [] for c in umass_fft_coefficients_to_keep}

for c in umass_fft_coefficients_to_keep:
    for record_no in range(len(fft_dataset)):
        
        # run fft
        current_record = fft_dataset[record_no]
        compressed_fft = fft(current_record)
        
        # remove smallest magnitude
        c_sort = np.sort(compressed_fft)[::-1]
        thresh = c_sort[c]
        compressed_fft[compressed_fft <= thresh] = 0
        
        # uncompress
        uncompressed_fft = np.clip(ifft(compressed_fft).real, 0, 1)
        # uncompressed_fft = ifft(compressed_fft).real
        # uncompressed_fft = (uncompressed_fft - min(uncompressed_fft)) / (max(uncompressed_fft) - min(uncompressed_fft))
        
        # work out diff
        fft_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_fft)).item())
        
for c, ratio in zip(umass_fft_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(fft_total_losses[c]) / len(fft_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))

coeffs: 576, ratio: 2, mse average: 0.009925
coeffs: 384, ratio: 3, mse average: 0.011724
coeffs: 288, ratio: 4, mse average: 0.012855
coeffs: 192, ratio: 6, mse average: 0.014297
coeffs: 164, ratio: 7, mse average: 0.014790
coeffs: 144, ratio: 8, mse average: 0.015141
coeffs: 128, ratio: 9, mse average: 0.015534
coeffs: 115, ratio: 10, mse average: 0.015828
coeffs: 104, ratio: 11, mse average: 0.016096
coeffs: 96, ratio: 12, mse average: 0.016295
coeffs: 88, ratio: 13, mse average: 0.016492
coeffs: 76, ratio: 15, mse average: 0.016838
coeffs: 64, ratio: 18, mse average: 0.017205
coeffs: 54, ratio: 21, mse average: 0.017535
coeffs: 38, ratio: 30, mse average: 0.018152
coeffs: 28, ratio: 41, mse average: 0.018625


In [29]:
fft_dataset = dred_val_fc.dred_data.cpu().numpy()
fft_total_losses = {c: [] for c in dred_fft_coefficients_to_keep}

for c in dred_fft_coefficients_to_keep:
    for record_no in range(len(fft_dataset)):
        
        # run fft
        current_record = fft_dataset[record_no]
        compressed_fft = fft(current_record)
        
        # remove smallest magnitude
        c_sort = np.sort(compressed_fft)[::-1]
        thresh = c_sort[c]
        compressed_fft[compressed_fft <= thresh] = 0
        
        # uncompress
        uncompressed_fft = np.clip(ifft(compressed_fft).real, 0, 1)
        # uncompressed_fft = ifft(compressed_fft).real
        # uncompressed_fft = (uncompressed_fft - min(uncompressed_fft)) / (max(uncompressed_fft) - min(uncompressed_fft))
        
        # work out diff
        fft_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_fft)).item())
        
for c, ratio in zip(dred_fft_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(fft_total_losses[c]) / len(fft_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))

coeffs: 1800, ratio: 2, mse average: 0.043205
coeffs: 1200, ratio: 3, mse average: 0.043213
coeffs: 900, ratio: 4, mse average: 0.043218
coeffs: 600, ratio: 6, mse average: 0.043227
coeffs: 514, ratio: 7, mse average: 0.043230
coeffs: 450, ratio: 8, mse average: 0.043236
coeffs: 400, ratio: 9, mse average: 0.043240
coeffs: 360, ratio: 10, mse average: 0.043244
coeffs: 327, ratio: 11, mse average: 0.043265
coeffs: 300, ratio: 12, mse average: 0.043270
coeffs: 276, ratio: 13, mse average: 0.043277
coeffs: 240, ratio: 15, mse average: 0.043296
coeffs: 200, ratio: 18, mse average: 0.043340
coeffs: 171, ratio: 21, mse average: 0.043437
coeffs: 120, ratio: 30, mse average: 0.043621
coeffs: 87, ratio: 41, mse average: 0.043865


In [20]:
# fig, axs = plt.subplots(2, 2, figsize=(12,10))

# for c, ax in zip(fft_coefficients_to_keep, axs.reshape(-1)):
#     current_record = test_record.reshape(-1)
#     compressed_fft = fft(current_record)

#     c_sort = np.sort(compressed_fft)[::-1]
#     thresh = c_sort[c]
#     compressed_fft[compressed_fft <= thresh] = 0
    
#     uncompressed_fft = np.clip(ifft(compressed_fft).real, 0, 1)
#     temp_loss_ = loss(torch.from_numpy(current_record), 
#                                 torch.from_numpy(uncompressed_fft)).item()
#     ax.plot(uncompressed_fft.reshape(-1), 'r', label=k)
#     ax.plot(current_record.reshape(-1), 'b--', alpha=1)
#     ax.set_title('coeffs_kept {}, mse: {:.4f}'.format(c, temp_loss_))
# plt.show()

### SVD

$C_r = \dfrac{m \times n}{(n-p)(m+n+1)}$

In [21]:
def ksvd_cratio_calc(m, n, c):
    
    p = n - (c)/(m*n*(m+n+1))
    
    return p
# MOVE THIS

In [22]:
required_ratios

[2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 18, 21, 30, 41]

### K-SVD

Paper link: http://webx.ubi.pt/~catalao/09334900.pdf. Not too useful for us as we aim to achieve a pre-determined compression ratio.

Paper link: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7741464. 

In [31]:
def solve_ksvd_ratios(rows, cols, cr):
    
    return rows / cr - rows / cols

In [41]:
components_cer = [int(max(np.floor(solve_ksvd_ratios(12, 48, i)), 1) )for i in required_ratios]
components_umass = [int(max(np.floor(solve_ksvd_ratios(12, 96, i)), 1) )for i in required_ratios]
components_dred = [int(max(np.floor(solve_ksvd_ratios(60, 60, i)), 1) )for i in required_ratios]

In [26]:
pca_dataset = cer_val_fc.cern_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 48)
total_losses = {c: [] for c in components_cer}
no_days = 12

for c in components_cer:
    print(c)
    for record_no in range((pca_dataset.shape[0] // (no_days))):

        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        dictionarylearner = DictionaryLearning(c, transform_algorithm = 'omp', 
                                       max_iter = 60, transform_n_nonzero_coefs=1)
        compressed_ksvd = dictionarylearner.fit_transform(current_record)
        
        # uncompress
        uncompressed_ksvd = compressed_ksvd @ dictionarylearner.components_
        
        # work out diff
        total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_ksvd)).item())
        
for c, ratio in zip(components_cer, required_ratios):
    
    mse_avg_ = sum(total_losses[c]) / len(total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))
        

5


  copy_Xy=copy_cov,


3


  copy_Xy=copy_cov,


2


  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


coeffs: 5, ratio: 2, mse average: 0.005642
coeffs: 3, ratio: 3, mse average: 0.008403
coeffs: 2, ratio: 4, mse average: 0.010505
coeffs: 1, ratio: 6, mse average: 0.013974
coeffs: 1, ratio: 7, mse average: 0.013974
coeffs: 1, ratio: 8, mse average: 0.013974
coeffs: 1, ratio: 9, mse average: 0.013974
coeffs: 1, ratio: 10, mse average: 0.013974
coeffs: 1, ratio: 11, mse average: 0.013974
coeffs: 1, ratio: 12, mse average: 0.013974
coeffs: 1, ratio: 13, mse average: 0.013974
coeffs: 1, ratio: 15, mse average: 0.013974
coeffs: 1, ratio: 18, mse average: 0.013974
coeffs: 1, ratio: 21, mse average: 0.013974
coeffs: 1, ratio: 30, mse average: 0.013974
coeffs: 1, ratio: 41, mse average: 0.013974


In [27]:
pca_dataset = umass_val_fc.umass_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 96)
total_losses = {c: [] for c in components_umass}
no_days = 12

for c in components_umass:
    print(c)
    for record_no in range((pca_dataset.shape[0] // (no_days))):

        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        dictionarylearner = DictionaryLearning(c, transform_algorithm = 'omp', 
                                       max_iter = 60, transform_n_nonzero_coefs=1)
        compressed_ksvd = dictionarylearner.fit_transform(current_record)
        
        # uncompress
        uncompressed_ksvd = compressed_ksvd @ dictionarylearner.components_
        
        # work out diff
        total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_ksvd)).item())
        
for c, ratio in zip(components_umass, required_ratios):
    
    mse_avg_ = sum(total_losses[c]) / len(total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))
        

5
3
2
1
1
1
1
1
1
1
1
1
1
1
1
1
coeffs: 5, ratio: 2, mse average: 0.008404
coeffs: 3, ratio: 3, mse average: 0.011983
coeffs: 2, ratio: 4, mse average: 0.014334
coeffs: 1, ratio: 6, mse average: 0.017552
coeffs: 1, ratio: 7, mse average: 0.017552
coeffs: 1, ratio: 8, mse average: 0.017552
coeffs: 1, ratio: 9, mse average: 0.017552
coeffs: 1, ratio: 10, mse average: 0.017552
coeffs: 1, ratio: 11, mse average: 0.017552
coeffs: 1, ratio: 12, mse average: 0.017552
coeffs: 1, ratio: 13, mse average: 0.017552
coeffs: 1, ratio: 15, mse average: 0.017552
coeffs: 1, ratio: 18, mse average: 0.017552
coeffs: 1, ratio: 21, mse average: 0.017552
coeffs: 1, ratio: 30, mse average: 0.017552
coeffs: 1, ratio: 41, mse average: 0.017552


In [49]:
pca_dataset = dred_val_fc.dred_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 60)
total_losses = {c: [] for c in components_dred}
no_days = 60

for c in components_dred:
    print(c)
    for record_no in range((pca_dataset.shape[0] // (no_days))):

        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        dictionarylearner = DictionaryLearning(c, transform_algorithm = 'omp', 
                                       max_iter = 60, transform_n_nonzero_coefs=1)
        compressed_ksvd = dictionarylearner.fit_transform(current_record)
        
        # uncompress
        uncompressed_ksvd = compressed_ksvd @ dictionarylearner.components_
        
        # work out diff
        total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_ksvd)).item())
        
for c, ratio in zip(components_dred, required_ratios):
    
    mse_avg_ = sum(total_losses[c]) / len(total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))
        

29


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


19


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


14


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


9


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


7


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


6


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


5


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


5


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


4


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


4


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


3


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


3


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


2


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


1


  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,
  copy_Xy=copy_cov,


coeffs: 29, ratio: 2, mse average: 0.000055
coeffs: 19, ratio: 3, mse average: 0.000133
coeffs: 14, ratio: 4, mse average: 0.000121
coeffs: 9, ratio: 6, mse average: 0.000186
coeffs: 7, ratio: 7, mse average: 0.000281
coeffs: 6, ratio: 8, mse average: 0.000298
coeffs: 5, ratio: 9, mse average: 0.000398
coeffs: 5, ratio: 10, mse average: 0.000398
coeffs: 4, ratio: 11, mse average: 0.000578
coeffs: 4, ratio: 12, mse average: 0.000578
coeffs: 3, ratio: 13, mse average: 0.001015
coeffs: 3, ratio: 15, mse average: 0.001015
coeffs: 2, ratio: 18, mse average: 0.002004
coeffs: 1, ratio: 21, mse average: 0.004129
coeffs: 1, ratio: 30, mse average: 0.004129
coeffs: 1, ratio: 41, mse average: 0.004129


### DWT (1D)

1D Version. Refer paper link: https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=8275177. We use the "best" from this paper.

In [9]:
my_Wavelet = 'db2'
n = 3

In [10]:
cer_dwt1d_coefficients_to_keep = [int(12*48 / k) for k in required_ratios]
umass_dwt1d_coefficients_to_keep = [int(12*96 / k) for k in required_ratios]
dred_dwt1d_coefficients_to_keep = [int(60*60 / k) for k in required_ratios]

In [30]:
fft_dataset = cer_val_fc.cern_data.cpu().numpy()

dwt1d_total_losses = {c: [] for c in cer_dwt1d_coefficients_to_keep}

for c in cer_dwt1d_coefficients_to_keep:
    for record_no in range(len(fft_dataset)):
        # run dwt
        current_record = fft_dataset[record_no]
        coeffs = pywt.wavedec(current_record, my_Wavelet, level = n)
        
        # sort and remove smallest magnitude
        coeff_arr, coeff_slices = pywt.coeffs_to_array(coeffs)
        Csort = np.sort(np.abs(coeff_arr.reshape(-1)))[::-1]
        thresh = Csort[c]
        ind = np.abs(coeff_arr) > thresh
        Cfilt = coeff_arr * ind
        coeffs_filt = pywt.array_to_coeffs(Cfilt, coeff_slices, output_format='wavedec')
        
        # uncompress
        uncompressed_dwt1d = np.clip(pywt.waverec(coeffs_filt, my_Wavelet), 0, 1)
        
        # work out diff
        dwt1d_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_dwt1d)).item())
        
for c, ratio in zip(cer_dwt1d_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(dwt1d_total_losses[c]) / len(dwt1d_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))

coeffs: 288, ratio: 2, mse average: 0.000180
coeffs: 192, ratio: 3, mse average: 0.000731
coeffs: 144, ratio: 4, mse average: 0.001460
coeffs: 96, ratio: 6, mse average: 0.003068
coeffs: 82, ratio: 7, mse average: 0.003887
coeffs: 72, ratio: 8, mse average: 0.004645
coeffs: 64, ratio: 9, mse average: 0.005410
coeffs: 57, ratio: 10, mse average: 0.006218
coeffs: 52, ratio: 11, mse average: 0.006912
coeffs: 48, ratio: 12, mse average: 0.007557
coeffs: 44, ratio: 13, mse average: 0.008295
coeffs: 38, ratio: 15, mse average: 0.009626
coeffs: 32, ratio: 18, mse average: 0.011301
coeffs: 27, ratio: 21, mse average: 0.013097
coeffs: 19, ratio: 30, mse average: 0.017136
coeffs: 14, ratio: 41, mse average: 0.021124


In [31]:
fft_dataset = umass_val_fc.umass_data.cpu().numpy()

dwt1d_total_losses = {c: [] for c in umass_dwt1d_coefficients_to_keep}

for c in umass_dwt1d_coefficients_to_keep:
    for record_no in range(len(fft_dataset)):
        # run dwt
        current_record = fft_dataset[record_no]
        coeffs = pywt.wavedec(current_record, my_Wavelet, level = n)
        
        # sort and remove smallest magnitude
        coeff_arr, coeff_slices = pywt.coeffs_to_array(coeffs)
        Csort = np.sort(np.abs(coeff_arr.reshape(-1)))[::-1]
        thresh = Csort[c]
        ind = np.abs(coeff_arr) > thresh
        Cfilt = coeff_arr * ind
        coeffs_filt = pywt.array_to_coeffs(Cfilt, coeff_slices, output_format='wavedec')
        
        # uncompress
        uncompressed_dwt1d = np.clip(pywt.waverec(coeffs_filt, my_Wavelet), 0, 1)
        
        # work out diff
        dwt1d_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_dwt1d)).item())
        
for c, ratio in zip(umass_dwt1d_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(dwt1d_total_losses[c]) / len(dwt1d_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))

coeffs: 576, ratio: 2, mse average: 0.000608
coeffs: 384, ratio: 3, mse average: 0.001958
coeffs: 288, ratio: 4, mse average: 0.003397
coeffs: 192, ratio: 6, mse average: 0.006015
coeffs: 164, ratio: 7, mse average: 0.007203
coeffs: 144, ratio: 8, mse average: 0.008275
coeffs: 128, ratio: 9, mse average: 0.009384
coeffs: 115, ratio: 10, mse average: 0.010450
coeffs: 104, ratio: 11, mse average: 0.011526
coeffs: 96, ratio: 12, mse average: 0.012406
coeffs: 88, ratio: 13, mse average: 0.013418
coeffs: 76, ratio: 15, mse average: 0.015131
coeffs: 64, ratio: 18, mse average: 0.017281
coeffs: 54, ratio: 21, mse average: 0.019375
coeffs: 38, ratio: 30, mse average: 0.023442
coeffs: 28, ratio: 41, mse average: 0.026796


In [14]:
fft_dataset = dred_val_fc.dred_data.cpu().numpy()

dwt1d_total_losses = {c: [] for c in dred_dwt1d_coefficients_to_keep}

for c in dred_dwt1d_coefficients_to_keep:
    for record_no in range(len(fft_dataset)):
        # run dwt
        current_record = fft_dataset[record_no]
        coeffs = pywt.wavedec(current_record, my_Wavelet, level = n)
        
        # sort and remove smallest magnitude
        coeff_arr, coeff_slices = pywt.coeffs_to_array(coeffs)
        Csort = np.sort(np.abs(coeff_arr.reshape(-1)))[::-1]
        thresh = Csort[c]
        ind = np.abs(coeff_arr) > thresh
        Cfilt = coeff_arr * ind
        coeffs_filt = pywt.array_to_coeffs(Cfilt, coeff_slices, output_format='wavedec')
        
        # uncompress
        uncompressed_dwt1d = np.clip(pywt.waverec(coeffs_filt, my_Wavelet), 0, 1)
        
        # work out diff
        dwt1d_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_dwt1d)).item())
        
for c, ratio in zip(dred_dwt1d_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(dwt1d_total_losses[c]) / len(dwt1d_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.8f}'.format(c, ratio, mse_avg_))

coeffs: 1800, ratio: 2, mse average: 0.00000000
coeffs: 1200, ratio: 3, mse average: 0.00000000
coeffs: 900, ratio: 4, mse average: 0.00000001
coeffs: 600, ratio: 6, mse average: 0.00000016
coeffs: 514, ratio: 7, mse average: 0.00000045
coeffs: 450, ratio: 8, mse average: 0.00000199
coeffs: 400, ratio: 9, mse average: 0.00000722
coeffs: 360, ratio: 10, mse average: 0.00001323
coeffs: 327, ratio: 11, mse average: 0.00001657
coeffs: 300, ratio: 12, mse average: 0.00005944
coeffs: 276, ratio: 13, mse average: 0.00024473
coeffs: 240, ratio: 15, mse average: 0.00139100
coeffs: 200, ratio: 18, mse average: 0.00446969
coeffs: 171, ratio: 21, mse average: 0.00872499
coeffs: 120, ratio: 30, mse average: 0.02754007
coeffs: 87, ratio: 41, mse average: 0.05072147


#### 2D Version

In [38]:
cer_dwt2d_coefficients_to_keep = [int(12*48 / k) for k in required_ratios]
umass_dwt2d_coefficients_to_keep = [int(12*96 / k) for k in required_ratios]
dred_dwt2d_coefficients_to_keep = [int(60*60 / k) for k in required_ratios]

In [33]:
pca_dataset = cer_val_fc.cern_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 48)
dwt2d_total_losses = {c: [] for c in cer_dwt2d_coefficients_to_keep}
no_days = 12

for c in cer_dwt2d_coefficients_to_keep:
    for record_no in range((pca_dataset.shape[0] // (no_days))):
        # run dwt
        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        coeffs = pywt.wavedec2(current_record, my_Wavelet, level = n)
        
        # sort and remove smallest magnitude
        coeff_arr, coeff_slices = pywt.coeffs_to_array(coeffs)
        Csort = np.sort(np.abs(coeff_arr.reshape(-1)))[::-1]
        thresh = Csort[c]
        ind = np.abs(coeff_arr) > thresh
        Cfilt = coeff_arr * ind
        coeffs_filt = pywt.array_to_coeffs(Cfilt, coeff_slices, output_format='wavedec2')
        
        # uncompress
        uncompressed_dwt2d = np.clip(pywt.waverec2(coeffs_filt, my_Wavelet), 0, 1)
        
        # work out diff
        dwt2d_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_dwt2d)).item())
        
for c, ratio in zip(cer_dwt2d_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(dwt2d_total_losses[c]) / len(dwt2d_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))
        

  "boundary effects.").format(level))


coeffs: 288, ratio: 2, mse average: 0.000682
coeffs: 192, ratio: 3, mse average: 0.001725
coeffs: 144, ratio: 4, mse average: 0.002818
coeffs: 96, ratio: 6, mse average: 0.004825
coeffs: 82, ratio: 7, mse average: 0.005731
coeffs: 72, ratio: 8, mse average: 0.006510
coeffs: 64, ratio: 9, mse average: 0.007274
coeffs: 57, ratio: 10, mse average: 0.008040
coeffs: 52, ratio: 11, mse average: 0.008644
coeffs: 48, ratio: 12, mse average: 0.009189
coeffs: 44, ratio: 13, mse average: 0.009789
coeffs: 38, ratio: 15, mse average: 0.010852
coeffs: 32, ratio: 18, mse average: 0.012077
coeffs: 27, ratio: 21, mse average: 0.013286
coeffs: 19, ratio: 30, mse average: 0.015881
coeffs: 14, ratio: 41, mse average: 0.018231


In [34]:
pca_dataset = umass_val_fc.umass_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 96)
dwt2d_total_losses = {c: [] for c in umass_dwt2d_coefficients_to_keep}
no_days = 12

for c in umass_dwt2d_coefficients_to_keep:
    for record_no in range((pca_dataset.shape[0] // (no_days))):
        # run dwt
        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        coeffs = pywt.wavedec2(current_record, my_Wavelet, level = n)
        
        # sort and remove smallest magnitude
        coeff_arr, coeff_slices = pywt.coeffs_to_array(coeffs)
        Csort = np.sort(np.abs(coeff_arr.reshape(-1)))[::-1]
        thresh = Csort[c]
        ind = np.abs(coeff_arr) > thresh
        Cfilt = coeff_arr * ind
        coeffs_filt = pywt.array_to_coeffs(Cfilt, coeff_slices, output_format='wavedec2')
        
        # uncompress
        uncompressed_dwt2d = np.clip(pywt.waverec2(coeffs_filt, my_Wavelet), 0, 1)
        
        # work out diff
        dwt2d_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_dwt2d)).item())
        
for c, ratio in zip(umass_dwt2d_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(dwt2d_total_losses[c]) / len(dwt2d_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))
        

coeffs: 576, ratio: 2, mse average: 0.001704
coeffs: 384, ratio: 3, mse average: 0.003561
coeffs: 288, ratio: 4, mse average: 0.005226
coeffs: 192, ratio: 6, mse average: 0.007768
coeffs: 164, ratio: 7, mse average: 0.008847
coeffs: 144, ratio: 8, mse average: 0.009680
coeffs: 128, ratio: 9, mse average: 0.010484
coeffs: 115, ratio: 10, mse average: 0.011234
coeffs: 104, ratio: 11, mse average: 0.011928
coeffs: 96, ratio: 12, mse average: 0.012464
coeffs: 88, ratio: 13, mse average: 0.013048
coeffs: 76, ratio: 15, mse average: 0.014041
coeffs: 64, ratio: 18, mse average: 0.015289
coeffs: 54, ratio: 21, mse average: 0.016509
coeffs: 38, ratio: 30, mse average: 0.019631
coeffs: 28, ratio: 41, mse average: 0.023239


In [39]:
pca_dataset = dred_val_fc.dred_data.cpu().numpy()
pca_dataset = pca_dataset.reshape(-1, 96)
dwt2d_total_losses = {c: [] for c in dred_dwt2d_coefficients_to_keep}
no_days = 60

for c in dred_dwt2d_coefficients_to_keep:
    for record_no in range((pca_dataset.shape[0] // (no_days))):
        # run dwt
        current_record = pca_dataset[(record_no * no_days):((record_no + 1) * no_days), :]
        coeffs = pywt.wavedec2(current_record, my_Wavelet, level = n)
        
        # sort and remove smallest magnitude
        coeff_arr, coeff_slices = pywt.coeffs_to_array(coeffs)
        Csort = np.sort(np.abs(coeff_arr.reshape(-1)))[::-1]
        thresh = Csort[c]
        ind = np.abs(coeff_arr) > thresh
        Cfilt = coeff_arr * ind
        coeffs_filt = pywt.array_to_coeffs(Cfilt, coeff_slices, output_format='wavedec2')
        
        # uncompress
        uncompressed_dwt2d = np.clip(pywt.waverec2(coeffs_filt, my_Wavelet), 0, 1)
        
        # work out diff
        dwt2d_total_losses[c].append(loss(torch.from_numpy(current_record), 
                                    torch.from_numpy(uncompressed_dwt2d)).item())
        
for c, ratio in zip(dred_dwt2d_coefficients_to_keep, required_ratios):
    
    mse_avg_ = sum(dwt2d_total_losses[c]) / len(dwt2d_total_losses[c])
    
    print('coeffs: {}, ratio: {}, mse average: {:.6f}'.format(c, ratio, mse_avg_))
        

coeffs: 1800, ratio: 2, mse average: 0.000002
coeffs: 1200, ratio: 3, mse average: 0.000088
coeffs: 900, ratio: 4, mse average: 0.000385
coeffs: 600, ratio: 6, mse average: 0.001394
coeffs: 514, ratio: 7, mse average: 0.002045
coeffs: 450, ratio: 8, mse average: 0.002785
coeffs: 400, ratio: 9, mse average: 0.003569
coeffs: 360, ratio: 10, mse average: 0.004372
coeffs: 327, ratio: 11, mse average: 0.005167
coeffs: 300, ratio: 12, mse average: 0.005949
coeffs: 276, ratio: 13, mse average: 0.006740
coeffs: 240, ratio: 15, mse average: 0.008106
coeffs: 200, ratio: 18, mse average: 0.010002
coeffs: 171, ratio: 21, mse average: 0.011762
coeffs: 120, ratio: 30, mse average: 0.016171
coeffs: 87, ratio: 41, mse average: 0.020801
