In [1]:
# Disable TensorFlow debugging info and warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 2: Info and warnings not displayed 

In [2]:
import numpy as np
import skimage.io
import matplotlib.pyplot as plt
import pathlib
import tqdm
import annotation, misc, hyspec_io, image_render
import skimage.exposure
import tensorflow as tf

In [3]:
# Disable GPUs
tf.config.set_visible_devices([], 'GPU')
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
    assert device.device_type != 'GPU'

In [4]:
# Paths
pca_data_model_file = pathlib.Path('/media/mha114/Massimal/Larvik_Olberg/Hyperspectral/20210825/OlbergAreaS/M_PCA_Model/20210825_OlbergAreaA_PCA_data_and_model.npz')
tiles_dataset_path = pathlib.Path('/media/mha114/Massimal/Larvik_Olberg/Hyperspectral/20210825/OlbergAreaS/4c_Rad_Georef_SGC_Tiles/MergedTrainValDatasetNGT')
pca_tiles_dataset_path = pathlib.Path('/media/mha114/Massimal/Larvik_Olberg/Hyperspectral/20210825/OlbergAreaS/5c_Rad_Georef_SGC_PCA_Tiles/20210825_Olberg_PCA_TrainValDataset')

In [5]:
# Load PCA model
with np.load(pca_data_model_file) as npz_files:
    X_mean = npz_files['spectra_mean']
    X_std = npz_files['spectra_std']
    W_pca = npz_files['pca_components']

In [6]:
def pca_transform_image(image,W_pca,X_mean,X_std=None):
    """ Apply PCA transform to 3D image cube 
    
    # Arguments:
    image       NumPy array with 3 dimensions (n_rows,n_cols,n_channels)
    W_pca       PCA weight matrix with 2 dimensions (n_channels,n_components)
    X_mean      Mean value vector, to be subtracted from data ("centering")
                Length (n_channels,)
    
    # Keyword arguments:
    X_std       Standard deviation vector, to be used for scaling (z score)
                If None, no scaling is performed
                Length (n_channels)
                
    # Returns:
    image_pca   Numpy array with dimension (n_rows, n_cols, n_channels)
    
    # Notes:
    - Input pixels which are zero across all channels are set to zero in the 
    output PCA image as well.
    
    """
    # Create mask for nonzero values
    nonzero_mask = ~np.all(image==0,axis=2,keepdims=True)
    
    # Vectorize image
    im_vec = np.reshape(image,(-1,image.shape[-1]))
    
    # Subtract mean (always) and scale (optional)
    im_vec_norm = im_vec-X_mean
    if X_std is not None:
        im_vec_norm = im_vec_norm/X_std

    # PCA transform through matrix multiplication (projection to rotated coordinate system)
    im_vec_pca = im_vec_norm @ W_pca
    
    # Reshape into image, and ensure that zero-value input pixels are also zero in output
    im_pca = np.reshape(im_vec_pca,image.shape[0:2]+(im_vec_pca.shape[-1],))*nonzero_mask

    return im_pca

In [7]:
# Load tiles dataset
tiles_data = tf.data.experimental.load(str(tiles_dataset_path))

In [8]:
# PCA transform each tile
pca_tiles_list = []
label_tiles_list = []
for image_tile, label_tile in tqdm.tqdm(tiles_data.as_numpy_iterator()):    
    im_pca = pca_transform_image(image_tile,W_pca,X_mean,X_std)
    pca_tiles_list.append(im_pca)
    label_tiles_list.append(label_tile)

459it [00:26, 17.05it/s]


In [9]:
# Concantenate tiles into single tensor
pca_tiles = np.concatenate(np.expand_dims(pca_tiles_list,axis=0))
label_tiles = np.concatenate(np.expand_dims(label_tiles_list,axis=0))

In [10]:
# Create PCA dataset
pca_tiles_dataset = tf.data.Dataset.from_tensor_slices((pca_tiles,label_tiles))

In [11]:
# Save PCA dataset
tf.data.experimental.save(pca_tiles_dataset,str(pca_tiles_dataset_path))