In [1]:
# Disable TensorFlow debugging info and warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 2: Info and warnings not displayed 

In [2]:
import numpy as np
import sklearn.decomposition
import imblearn
import matplotlib.pyplot as plt
import matplotlib.colors
import pathlib
import tqdm
import annotation, misc, hyspec_cnn, hyspec_io, hyspec_ml  # Local imports

In [10]:
# Paths
hyspec_base = pathlib.Path('/media/mha114/Massimal/Vega_Sola/Hyperspectral/20220823/Area/2a_Rrs')
train_val_base = pathlib.Path('/media/mha114/Massimal/Vega_Sola/Hyperspectral/20220823/Area/3a_PCA_TrainValidationSplit')
class_json = train_val_base / 'Training/Annotation/label_classes.json'
pca_dir = pathlib.Path('/media/mha114/Massimal/Vega_Sola/Hyperspectral/20220823/Area/M_PCAModel')
spectra_file = pca_dir / '20220823_Vega_Sola_ExtractedSpectraAndLabels2023-05-27-01h42m12s.npz'
pca_model_path = pca_dir / '20220823_Vega_Sola_PCA-Model.npz'

In [4]:
# Get wavelengths from example hyspec file
example_hyspec_file_path = [path for path in sorted(hyspec_base.rglob('*.hdr'))][0]
_,wl,_,_ = hyspec_io.load_envi_image(str(example_hyspec_file_path),rgb_only=True)

In [5]:
# Load data
with np.load(spectra_file) as npz_files:
    X_raw = npz_files['X']
    y_raw = npz_files['y']

In [6]:
# Remove zero-valued spectra (caused by saturation)
nonzero_spectra_mask = ~np.all(X_raw==0,axis=1)
X = X_raw[nonzero_spectra_mask,:]
y = y_raw[nonzero_spectra_mask]

In [11]:
# Load class names and indices
class_dict = annotation.read_hasty_metadata(class_json)

In [13]:
# Inspect number of samples per class
samples_per_class = {}
for class_name, class_index in class_dict.items():
    print(f'{class_name:35} {np.count_nonzero(y==class_index)} samples')
    samples_per_class[class_name] = np.count_nonzero(y==class_index) / len(y)

Rock                                205572 samples
Cobble                              106211 samples
Sand                                245730 samples
Mearl bed                           385290 samples
Rockweed                            139784 samples
Kelp                                69648 samples
Brown algae                         274157 samples


In [14]:
samples_per_class

{'Rock': 0.14412026988373464,
 'Cobble': 0.0744612981564675,
 'Sand': 0.17227382094122795,
 'Mearl bed': 0.2701150875776084,
 'Rockweed': 0.09799830621596307,
 'Kelp': 0.04882809213736476,
 'Brown algae': 0.19220312508763368}

In [19]:
weights = {class_name:(1/np.sqrt(frac)) for class_name,frac in samples_per_class.items()}

In [26]:
weight_mean = np.mean(np.asarray([v for v in weights.values()]))

In [27]:
bal_weights = {class_name:weight/weight_mean for class_name,weight in weights.items()}

In [28]:
bal_weights

{'Rock': 0.8936593040017538,
 'Cobble': 1.2432807598688147,
 'Sand': 0.8173816269828617,
 'Mearl bed': 0.652769859085942,
 'Rockweed': 1.0837403161669816,
 'Kelp': 1.5353225790820613,
 'Brown algae': 0.7738455548115847}

In [29]:
bal_weights_rounded = {class_name:round(weight,ndigits=2) for class_name,weight in bal_weights.items()}

In [30]:
bal_weights_rounded

{'Rock': 0.89,
 'Cobble': 1.24,
 'Sand': 0.82,
 'Mearl bed': 0.65,
 'Rockweed': 1.08,
 'Kelp': 1.54,
 'Brown algae': 0.77}