In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.mixture import GaussianMixture
import joblib
from tqdm import tqdm
import pandas as pd
import argparse
import os
import pandas 

In [2]:

def weighted_l2(v, mat):
    if np.linalg.matrix_rank(mat) < mat.shape[0]:
        m = v.T @ np.linalg.pinv(mat) @ v
    else:
        # print("hi ", np.linalg.det(mat), np.linalg.matrix_rank(mat))
        m = v.T @ np.linalg.inv(mat) @ v
    return m

def decode(model, A, y, sigma = 1e-7):
    x_hat = np.empty(model.means_.shape)
    cost = []
    var_noise = sigma * np.eye(A.shape[0])

    for j in range(model.means_.shape[0]):
        var_j, mu_j = model.covariances_[j], model.means_[j]
        x_hat_j = var_j @ A.T @ np.linalg.inv(A @ var_j @ A.T + var_noise) @ (y - A @ mu_j) + mu_j
        # print(np.linalg.det(var_j))
        # print(var_j)
        try:
            cost_j = weighted_l2(y - A @ x_hat_j, var_noise) + weighted_l2(x_hat_j - mu_j, var_j) + np.log(np.linalg.det(var_j))
        except:
            print("y - A @ x_hat_j",y - A @ x_hat_j , "\n", var_noise,"\n",y,A)
        # print(np.linalg.det(var_j))
        x_hat[j] = x_hat_j
        cost.append(cost_j)

    j = np.argmin(cost)
    # print(j)
    return x_hat[j]

def PSNR(x_, x_t):
    assert x_.shape == x_t.shape
    peak = np.max(x_) - np.min(x_)
    mse = np.mean(((x_ - x_t)/peak)**2, axis=1)
    print(type(mse),mse)
    psnr = -10*np.log10(mse)
    return np.mean(psnr), np.min(psnr), np.max(psnr), np.std(psnr)

def plot_data(xs, ys, path = 'PSNR.png', ylabel='Avg PSNR'):
    plt.plot(xs, ys, marker="o")
    plt.xlabel('# of measurements')
    plt.ylabel(ylabel)
    plt.grid(True)
    for x,y in zip(xs,ys):
        label = f"({x},{round(y,2)})"
        plt.annotate(label, (x,y), textcoords="offset points", xytext=(-5,10), ha='center')
#     plt.tight_layout()
    plt.savefig(path)
    plt.close()

In [3]:
full_data = []
time_window = 30*2
for experiment_number  in [1,2]:
    
    for USV_number in [2,3,4,5]:
        path = "CNFUV_Datasets/Datasets/Data_Experiment_%d/pi%d.xlsx"% (experiment_number, USV_number)
        dataframe = pandas.read_excel(path)

        dataframe = dataframe.mask(dataframe.eq(" None")).dropna()
        dataframe = dataframe.mask(dataframe.eq("None")).dropna()
        dataframe = dataframe.sort_values(by=['time'])
        dataframe = dataframe[['Humidity','Temperature']]
        
        dataframe = dataframe[:(dataframe.shape[0]//time_window)*time_window]
        data = dataframe.to_numpy().flatten()
        data = data.reshape(-1,time_window)
        print(data.shape)
        full_data.extend(data)
full_data = np.array(full_data)


(50, 60)
(28, 60)
(58, 60)
(68, 60)
(18, 60)
(26, 60)
(34, 60)
(46, 60)


In [4]:
# print(full_data.shape)

print(full_data.shape)
print(len(full_data),len(full_data[0]), type(full_data),type(full_data[0]))

(328, 60)
328 60 <class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [5]:
n_train = full_data.shape[0]//10 *9
n_test = full_data.shape[0] - n_train
n_component_overall = 5
cnt = 5
n_init = 5
patch_size = time_window
to_train = 1
use_mat = 0

In [6]:
np.random.shuffle(data)
train_data = full_data[:n_train]
test_data = full_data[n_train:]

In [7]:
print(len(train_data))

print(len(test_data))

288
40


In [8]:
folder_name = 'GNFUV_results/latest/'
if to_train:
#     model = GaussianMixture(n_components=n_components, n_init=n_init, verbose=1, max_iter=200, init_params='random')
#     model.fit(train_data)

    lowest_bic = np.infty
    bic = []
    n_components_range = range(1, 10)
#     cv_types = ["spherical", "tied", "diag", "full"]
#     for cv_type in cv_types:
    for n_components in n_components_range:
        # Fit a Gaussian mixture with EM
        gmm = GaussianMixture(
            n_components=n_components, n_init=n_init,  max_iter=200,
        )
        gmm.fit(train_data)
        bic.append(gmm.bic(train_data))
        if bic[-1] < lowest_bic:
            lowest_bic = bic[-1]
            best_gmm = gmm
            n_component_overall = n_components
    model = best_gmm
    print("n_component_overall: ",n_component_overall)
    if os.path.isdir(folder_name):
        for f in os.listdir(folder_name):
            if not os.path.isdir(folder_name + f):
                os.remove(folder_name + f)
    else:
        os.makedirs(folder_name)

    # Save the model as a pickle in a file
    joblib.dump(model, folder_name + 'model.pkl')
else:
    
    # Load the model from the file
    model = joblib.load(folder_name + 'model.pkl')

n_component_overall:  8


In [9]:
for var in model.covariances_:
    print(var)
    print(np.linalg.det(var), np.linalg.matrix_rank(var))
    # print(np.linalg.det(np.linalg.inv(var)))
for var in model.weights_:
    print(var)

[[ 7.10467228 -0.61764706  6.41695502 ...  2.36851211  1.10986159
   2.51557093]
 [-0.61764706  6.05882453 -0.55882353 ...  4.41176471  1.38235294
   4.44117647]
 [ 6.41695502 -0.55882353  5.92733664 ...  2.31141869  0.93252595
   2.45847751]
 ...
 [ 2.36851211  4.41176471  2.31141869 ...  6.81661    -1.84948097
   6.93425606]
 [ 1.10986159  1.38235294  0.93252595 ... -1.84948097  7.51297678
  -1.84948097]
 [ 2.51557093  4.44117647  2.45847751 ...  6.93425606 -1.84948097
   7.11072764]]
3.722618747831842e-183 60
[[ 7.37456697  0.95225694  7.07118056 ...  2.05685764  4.82378472
   2.05685764]
 [ 0.95225694  4.53993156  1.05902778 ...  4.79600694  0.40798611
   4.79600694]
 [ 7.07118056  1.05902778  7.36805656 ...  1.84201389  5.10763889
   1.84201389]
 ...
 [ 2.05685764  4.79600694  1.84201389 ...  6.13498364 -0.01996528
   6.13498264]
 [ 4.82378472  0.40798611  5.10763889 ... -0.01996528  5.74826489
  -0.01996528]
 [ 2.05685764  4.79600694  1.84201389 ...  6.13498264 -0.01996528
   6.1

In [10]:
d = patch_size
A_ = np.random.binomial(1, 0.5, size=(d, d))

ms = [d*(i+1)//10 for i in range(10)]
print(ms)
psnr, psnr_min, psnr_max, psnr_std = [], [], [], []
val_err, err_std = [], []

for m in tqdm(ms):
    if use_mat:
        A = np.load(folder_name + 'mat/%d.npy' % m)
    else:
        A = A_[:m, :]
    mm = m-min(m//2,5)
    print(m,mm)
    reconstruction = np.empty(test_data.shape)
    patch_err = []
    cnt1 = cnt
    for j in range(len(test_data)):
        x = test_data[j]
        x_hat = np.zeros(x.shape)
        for i in range(x.shape[0]//d):
            y = A @ x[i*d:(i+1)*d]
            x_hat[i*d:(i+1)*d] = decode(model, A[:mm], y[:mm])
            patch_err.append(100*np.mean(np.square(y[mm:] - A[mm:] @ x_hat[i*d:(i+1)*d])))
        # cs example
        if cnt1 > 0:
            plt.plot(x, label='Original', color='C0')
            plt.plot(x_hat, label='Reconstructed', color='C1')
            # plt.plot(x[:20], label='Original', color='C0')
            # plt.plot(x[20:40], label='Original', color='C0')
            # plt.plot(x[40:], label='Original', color='C0')
            # plt.plot(x_hat[:20], label='Reconstructed', color='C1')
            # plt.plot(x_hat[20:40], label='Reconstructed', color='C1')
            # plt.plot(x_hat[40:], label='Reconstructed', color='C1')
            plt.legend()
            plt.savefig(folder_name + 'cs_%d_%d.png' % (cnt1, mm))
            plt.close()
            cnt1 -= 1
        reconstruction[j] = x_hat
    val, min_val, max_val, std_val = PSNR(test_data.astype(float), reconstruction.astype(float))
    psnr.append(val)
    psnr_min.append(min_val)
    psnr_max.append(max_val)
    psnr_std.append(std_val)
    val_err.append(np.mean(patch_err))
    err_std.append(np.std(patch_err))



[6, 12, 18, 24, 30, 36, 42, 48, 54, 60]


  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

6 3


 10%|████████▎                                                                          | 1/10 [00:05<00:52,  5.87s/it]

<class 'numpy.ndarray'> [0.05943633 0.06164759 0.06145993 0.0545819  0.06411843 0.06368714
 0.06421423 0.06953521 0.06976634 0.07180853 0.0650719  0.06976634
 0.07500517 0.06938706 0.06949403 0.06899333 0.06578688 0.06909357
 0.06769975 0.05771983 0.07144044 0.07841374 0.06897233 0.07151395
 0.06678969 0.07643636 0.08205911 0.07898442 0.07830492 0.0758238
 0.0775187  0.0775187  0.07926076 0.0985144  0.08670807 0.08938718
 0.09390713 0.10904565 0.11143253 0.11629982]
12 7


 20%|████████████████▌                                                                  | 2/10 [00:11<00:45,  5.70s/it]

<class 'numpy.ndarray'> [0.0072456  0.00649493 0.00499965 0.00488961 0.00340967 0.00605092
 0.00646184 0.00692706 0.00601837 0.00626218 0.00667644 0.00601837
 0.01090301 0.00566257 0.00628356 0.00757771 0.00464989 0.00519419
 0.00449126 0.00535256 0.00549347 0.00384845 0.008929   0.00635521
 0.01330811 0.00602786 0.00720152 0.00729779 0.00678298 0.00932398
 0.00605695 0.00605695 0.00611448 0.00049079 0.00031859 0.00041977
 0.0009536  0.00029714 0.00098733 0.00028654]
18 13


 30%|████████████████████████▉                                                          | 3/10 [00:17<00:39,  5.67s/it]

<class 'numpy.ndarray'> [2.83061127e-04 1.91684309e-03 1.90278458e-03 3.18676787e-04
 3.08018127e-04 4.12230500e-04 1.35303420e-04 2.59972689e-03
 2.00459182e-03 2.32004100e-03 4.69210011e-05 2.00459182e-03
 1.97689371e-04 2.05580034e-03 2.23895075e-03 1.59756587e-03
 4.10302037e-05 1.93084044e-05 1.07220155e-04 1.87941720e-04
 2.29791614e-03 2.37647647e-04 2.61572344e-04 1.82787272e-03
 4.69492987e-04 3.42046825e-04 1.95551849e-04 2.45937570e-03
 2.22536429e-03 2.03174323e-03 2.84040120e-05 2.84040120e-05
 1.37840875e-04 7.43316727e-04 2.26155959e-04 4.70352327e-04
 3.02607160e-04 3.50616031e-04 4.21254941e-04 4.72538236e-04]
24 19


 40%|█████████████████████████████████▏                                                 | 4/10 [00:23<00:35,  5.84s/it]

<class 'numpy.ndarray'> [5.69150956e-04 1.63671208e-04 7.70513406e-05 4.02714507e-04
 2.97036691e-04 4.81396039e-04 1.65674823e-04 1.18906077e-04
 7.41276424e-05 1.07055408e-04 7.93306074e-05 7.41276424e-05
 1.31725752e-04 5.31379097e-05 7.36781998e-05 2.34154140e-04
 4.40353063e-05 2.38221992e-05 1.73819097e-04 4.17155461e-04
 1.73139031e-04 2.87859547e-04 2.71525576e-04 1.56443738e-04
 4.05947474e-04 6.41885944e-04 3.63944358e-04 1.28776149e-04
 5.39664670e-05 1.65750479e-04 2.67251384e-05 2.67251384e-05
 2.17885299e-04 4.55073308e-04 2.00715936e-04 3.78461431e-04
 1.48985637e-04 2.64077590e-04 2.53010836e-04 1.81495279e-04]
30 25


 50%|█████████████████████████████████████████▌                                         | 5/10 [00:29<00:29,  5.94s/it]

<class 'numpy.ndarray'> [6.05095916e-04 1.79116767e-04 4.35318518e-05 4.96462861e-04
 4.24794691e-04 4.02197907e-04 2.56094225e-04 3.23837071e-04
 6.16857815e-05 1.25646590e-04 1.95269295e-04 6.16857815e-05
 2.28437403e-04 1.54886515e-04 1.06061478e-04 2.64279784e-04
 6.87420337e-05 5.01601171e-05 1.64156563e-04 2.79497157e-04
 3.29007239e-04 3.32503696e-04 3.11823014e-04 6.64958815e-05
 6.53222201e-04 5.64512149e-04 5.60172838e-05 2.64078634e-04
 1.63322558e-04 2.53383996e-04 5.74140991e-05 5.74140991e-05
 2.13862820e-04 9.99690268e-04 2.69519975e-04 2.81158538e-04
 1.50051599e-04 3.58160473e-04 2.76349891e-04 2.10541759e-04]
36 31


 60%|█████████████████████████████████████████████████▊                                 | 6/10 [00:35<00:24,  6.15s/it]

<class 'numpy.ndarray'> [2.42982886e-04 3.06144030e-04 3.09147664e-05 2.00901101e-04
 4.28923143e-04 3.97785793e-04 1.93138641e-04 1.72251005e-04
 4.41074037e-05 1.23424225e-04 9.06210687e-05 4.41074037e-05
 2.25224990e-04 4.74018089e-05 1.41416116e-04 1.02350588e-04
 1.29694722e-04 6.01795850e-05 1.93998727e-04 3.61416851e-04
 2.24143875e-04 4.13440355e-04 1.71705854e-04 6.91022195e-05
 7.67202684e-04 5.99257886e-04 4.85259457e-05 6.92816399e-05
 6.31909351e-05 1.43410379e-04 5.20591393e-05 5.20591393e-05
 1.62329735e-04 4.84149780e-04 1.59961613e-04 1.63287194e-04
 6.36496768e-05 1.93527179e-04 2.00084068e-04 2.39084182e-04]
42 37


 70%|██████████████████████████████████████████████████████████                         | 7/10 [00:42<00:19,  6.39s/it]

<class 'numpy.ndarray'> [4.47276655e-04 1.30363462e-04 1.11429741e-05 2.26355404e-04
 4.53404397e-04 6.30083063e-04 2.75633413e-04 7.51040051e-05
 1.07387629e-05 6.19096157e-05 8.95352196e-05 1.07387629e-05
 2.09243293e-04 8.54843304e-06 1.14655017e-04 7.89411133e-05
 1.53075743e-04 8.34609868e-06 6.63666653e-05 1.83454622e-04
 3.18933030e-04 6.70175424e-04 9.34682021e-05 8.03547935e-05
 1.68267167e-04 5.54090655e-04 2.80535805e-04 1.72239461e-04
 7.96121204e-06 1.66390182e-04 7.04336714e-06 7.04336714e-06
 2.17635002e-04 5.18101343e-04 3.87656164e-04 2.05340414e-04
 1.04562222e-04 2.71216012e-04 3.94523098e-04 5.31559097e-04]
48 43


 80%|██████████████████████████████████████████████████████████████████▍                | 8/10 [00:49<00:13,  6.65s/it]

<class 'numpy.ndarray'> [1.51767400e-04 7.35399196e-05 7.19214074e-08 3.22852172e-04
 9.26175176e-04 4.99281417e-04 1.82553549e-04 7.39266540e-05
 7.06013891e-08 1.24986178e-04 8.37332776e-05 7.06013891e-08
 3.15548010e-04 1.25017129e-07 8.74190886e-05 1.35450377e-04
 6.01754491e-05 8.45596165e-07 1.13025486e-04 2.45380931e-04
 2.23588725e-04 4.32656572e-04 1.26961431e-04 1.49048922e-04
 1.63361132e-04 9.85492108e-04 3.34399066e-04 3.40003420e-04
 8.20115725e-08 1.73796048e-04 6.84264065e-07 6.84264065e-07
 2.75322402e-04 5.09005365e-04 9.39446270e-05 2.20690858e-04
 1.02715258e-04 2.66214079e-04 2.78149943e-04 7.25568340e-04]
54 49


 90%|██████████████████████████████████████████████████████████████████████████▋        | 9/10 [00:57<00:06,  6.87s/it]

<class 'numpy.ndarray'> [1.38696414e-04 1.05218462e-04 1.43702586e-10 3.84384589e-04
 6.78797676e-04 4.94146616e-03 4.44813653e-04 1.32325806e-04
 1.40530201e-10 6.45642924e-04 1.41890062e-04 1.40530201e-10
 5.97335362e-05 9.92729658e-11 4.93670796e-04 1.52379900e-05
 1.58149115e-05 4.52492344e-11 4.17017032e-04 5.39428063e-04
 5.39752101e-05 4.57230553e-04 1.49508961e-04 5.71865235e-04
 6.72437874e-04 5.61706270e-03 1.78223065e-03 1.36482350e-03
 9.62015379e-11 6.71120367e-04 4.18639138e-11 4.18639138e-11
 1.51152893e-03 5.54589767e-03 5.51839520e-04 5.93119277e-04
 2.57153952e-04 1.26594178e-03 7.05577327e-04 2.96448351e-04]
60 55


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:05<00:00,  6.53s/it]

<class 'numpy.ndarray'> [6.49631635e-05 2.73925537e-06 7.47972004e-12 2.74280643e-05
 8.25024444e-05 1.94712390e-04 1.42012323e-05 3.23346039e-05
 6.69689020e-12 2.39359954e-06 2.02484005e-05 6.69689020e-12
 1.33933276e-05 6.61576050e-12 8.28413076e-06 4.29213213e-07
 1.23155079e-06 6.88039648e-12 1.01520942e-05 2.13124416e-05
 2.08283233e-05 3.81216482e-05 9.88277676e-06 6.45759100e-06
 6.86876844e-05 4.95515877e-04 1.06324013e-06 7.00667962e-06
 5.86112790e-12 7.93156165e-06 6.07190058e-12 6.07190058e-12
 1.95343581e-05 1.17993159e-04 3.40502585e-05 1.92036401e-04
 5.49972860e-05 9.89312525e-05 1.81874216e-05 1.05657725e-04]





In [11]:
print(psnr)
# print(psnr_min)
# print(psnr_max)
print(psnr_std)
print(val_err)
print(err_std)
plot_data([x//5*4 for x in ms], psnr, path=folder_name + 'psnr.png')
plot_data([x//5*4 for x in ms], val_err, path=folder_name + 'val_err.png', ylabel='Validation error (1e-2)')

[11.306942021473528, 23.996076735150165, 33.64939892826728, 38.02634774153701, 37.09530272904833, 38.40331225500031, 39.61238071973425, 42.92506224347905, 47.26798296875737, 59.98264561238015]
[0.7437301876568868, 4.478661940325201, 6.081716566506871, 3.6932421705823804, 3.4400416436524432, 3.4857304255832013, 6.129178784359185, 12.687607472197145, 27.361254832849703, 26.66036756708394]
[15444.87333576431, 2293.1688578503076, 1090.6051679128673, 201.1983035343885, 333.4051567241114, 338.53878304297325, 198.78607996085793, 192.8423254075995, 913.5616358559506, 38.28439230108501]
[4729.975519380865, 1754.4410549731938, 1223.4457577742958, 182.2448327709469, 267.6442085595529, 377.15604255147394, 313.2828615127684, 233.44206033179012, 1814.9048709355286, 108.03003587336893]


In [12]:
for ii in range(len(test_data)):
    avg_cost = 0
    x = test_data[i]
    cost = []
    for j in range(model.means_.shape[0]):
        var_j, mu_j = model.covariances_[j], model.means_[j]
        cost_j = weighted_l2(x - mu_j, var_j) + np.log(np.linalg.det(var_j))
        cost.append(cost_j)
    k = np.argmin(cost)
    avg_cost += cost[k]
avg_cost /= len(test_data)
print(avg_cost)

13432.638597549409
