In [None]:
from src.ConvAE_models import *
import pandas as pd
import time
from sklearn.decomposition import PCA

In [None]:
# parameters for plotting
nimgs_train = 20
nimgs_test = 20

np.random.seed(seed=42)
inds_train = np.random.randint(size = nimgs_train, low = 0, high = 2919)
inds_test = np.random.randint(size = nimgs_test, low = 0, high = 364)

In [None]:
# loop over variables and encoding dimensions

var_list = ["t2m", "500gh", "850u", "850v"]
enc_dim_list = [2,4,8,12,16]

for this_var in var_list:
    
    # load data
    x_train, x_valid, x_test = load_and_normalize_data(data_dir='.../data/',
                                                       var=this_var)
    
    for this_enc_dim in enc_dim_list:
        
        start_time = time.time()

        # load data
        x_train, x_valid, x_test = load_and_normalize_data(data_dir='.../data/',
                                                       var=this_var)
        
        start_time = time.time()
        
        # reshape input datasets for PCA
        x_train_reshaped = np.moveaxis(x_train, 0,2)
        x_train_reshaped = x_train_reshaped.reshape([81*81, x_train.shape[0]])
        x_train_reshaped = np.moveaxis(x_train_reshaped, 0, 1)

        x_valid_reshaped = np.moveaxis(x_valid, 0,2)
        x_valid_reshaped = x_valid_reshaped.reshape([81*81, x_valid.shape[0]])
        x_valid_reshaped = np.moveaxis(x_valid_reshaped, 0, 1)

        x_test_reshaped = np.moveaxis(x_test, 0,2)
        x_test_reshaped = x_test_reshaped.reshape([81*81, x_test.shape[0]])
        x_test_reshaped = np.moveaxis(x_test_reshaped, 0, 1)

        # build + fit PCA model
        pca = PCA(this_enc_dim)
        pca_model = pca.fit_transform(x_train_reshaped)

        # transform training, validation and test data with PCA model
        x_train_PCA_enc = pca.transform(x_train_reshaped)
        x_valid_PCA_enc = pca.transform(x_valid_reshaped)
        x_test_PCA_enc = pca.transform(x_test_reshaped)

        # save the encoding
        filename_preds = '.../AE_results/predictions/PCA_' + this_var + '_' + str(this_enc_dim) + '.npy'
        tmp = np.append(x_train_PCA_enc, x_valid_PCA_enc, axis = 0)
        preds = np.append(tmp, x_test_PCA_enc, axis = 0)
        np.save(arr=preds, file=filename_preds)

        # compute reconstructions
        x_train_PCA_reconstr = pca.inverse_transform(x_train_PCA_enc)
        x_valid_PCA_reconstr = pca.inverse_transform(x_valid_PCA_enc)
        x_test_PCA_reconstr = pca.inverse_transform(x_test_PCA_enc)

        # reshape reconstructions to match original dimensions
        x_train_PCA_reconstr = x_train_PCA_reconstr.reshape(x_train.shape[0], 81, 81)
        x_valid_PCA_reconstr = x_valid_PCA_reconstr.reshape(x_valid.shape[0], 81, 81)
        x_test_PCA_reconstr = x_test_PCA_reconstr.reshape(x_test.shape[0], 81, 81)

        elapsed_time = time.time() - start_time

        # compute losses
        mse_train = np.square(x_train_PCA_reconstr - x_train[:,:,:,0]).mean()
        mse_valid = np.square(x_valid_PCA_reconstr - x_valid[:,:,:,0]).mean()
        mse_test = np.square(x_test_PCA_reconstr - x_test[:,:,:,0]).mean()

        # create plots
        
        # train
        plot_base_fname = '.../AE_results/single_plots/PCA/' + 'PCA_train_' + this_var + '_' + str(this_enc_dim) + '_'
        inds = inds_train
        data = x_train
        decoded_imgs = x_train_PCA_reconstr

        for i in range(0, inds.shape[0]):
            # original (= input) image
            plot_fname = plot_base_fname + str(inds[i]) + '_' + 'orig' + '.pdf'
            fig=plt.figure(figsize=(4, 4))
            plt.imshow(data[inds[i],:,:,0], vmin = 0, vmax = 1)
            plt.axis('off')
            plt.savefig(plot_fname,
                            bbox_inches='tight')
            plt.close(fig)

            # reconstructed (= output) image
            plot_fname = plot_base_fname + str(inds[i]) + '_' + 'reconstr' + '.pdf'
            fig=plt.figure(figsize=(4, 4))
            plt.imshow(decoded_imgs[inds[i],:,:], vmin = 0, vmax = 1)
            plt.axis('off')
            plt.savefig(plot_fname,
                            bbox_inches='tight')
            plt.close(fig)

            # difference
            plot_fname = plot_base_fname + str(inds[i]) + '_' + 'diff' + '.pdf'
            fig=plt.figure(figsize=(4, 4))
            plt.imshow(data[inds[i],:,:,0] - decoded_imgs[inds[i],:,:], 
                       cmap = 'RdBu', vmin = -1, vmax = 1)
            plt.axis('off')
            plt.savefig(plot_fname,
                            bbox_inches='tight')
            plt.close(fig)

        # test
        plot_base_fname = '.../AE_results/single_plots/PCA/' + 'PCA_test_' + this_var + '_' + str(this_enc_dim) + '_'
        inds = inds_test
        data = x_test
        decoded_imgs = x_test_PCA_reconstr

        for i in range(0, inds.shape[0]):
            # original (= input) image
            plot_fname = plot_base_fname + str(inds[i]) + '_' + 'orig' + '.pdf'
            fig=plt.figure(figsize=(4, 4))
            plt.imshow(data[inds[i],:,:,0], vmin = 0, vmax = 1)
            plt.axis('off')
            plt.savefig(plot_fname,
                            bbox_inches='tight')
            plt.close(fig)

            # reconstructed (= output) image
            plot_fname = plot_base_fname + str(inds[i]) + '_' + 'reconstr' + '.pdf'
            fig=plt.figure(figsize=(4, 4))
            plt.imshow(decoded_imgs[inds[i],:,:], vmin = 0, vmax = 1)
            plt.axis('off')
            plt.savefig(plot_fname,
                            bbox_inches='tight')
            plt.close(fig)

            # difference
            plot_fname = plot_base_fname + str(inds[i]) + '_' + 'diff' + '.pdf'
            fig=plt.figure(figsize=(4, 4))
            plt.imshow(data[inds[i],:,:,0] - decoded_imgs[inds[i],:,:], 
                       cmap = 'RdBu', vmin = -1, vmax = 1)
            plt.axis('off')
            plt.savefig(plot_fname,
                            bbox_inches='tight')
            plt.close(fig)

