# Setup

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import image
import seaborn as sns
import glob
from scipy.stats.stats import pearsonr
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import normalized_mutual_info_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_absolute_error, accuracy_score, f1_score, balanced_accuracy_score, mean_squared_error, r2_score

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
# which datasets to read and write

do_indian_pines = True 
do_salient_objects = True
do_plastic_flakes = True
do_soil_moisture = True
do_foods = True


## Define functions

In [3]:
## dataset loading

def load_datasets(Dataset):
    
    if Dataset == 'SM':
        hyper_path = '../data/soil_moisture/hyperspectral_imagery/*npy'
        hyper = np.load(glob.glob(hyper_path)[0])
        gt_path = '../data/soil_moisture/gt_labels/*npy'
        gt = np.load(glob.glob(gt_path)[0])
        return hyper, gt
    
    if Dataset == 'IN':
        hyper_path = '../data/indian_pines/hyperspectral_imagery/*npy'
        hyper = np.load(glob.glob(hyper_path)[0])
        gt_path = '../data/indian_pines/gt_labels/*npy'
        gt = np.load(glob.glob(gt_path)[0])
        return hyper, gt
    
    if Dataset == 'SO':
        hyper_path = '../data/salient_objects/hyperspectral_imagery/*npy'
        gt_path = '../data/salient_objects/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels

                          
    if Dataset == 'PF':
        hyper_path = '../data/plastic_flakes/hyperspectral_imagery/*npy'
        gt_path = '../data/plastic_flakes/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels
    
    if Dataset == 'Foods':
        hyper_path = '../data/foods/hyperspectral_imagery/*npy'
        gt_path = '../data/foods/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels

In [4]:
def normalize(data, mean, std): 
    return (data - mean) / std


def baseline_supervised_pass(data_fpath, labels_fpath, Dataset, random_subset=False, num_random_bands=15):
    
    # train val split
    data = np.load(data_fpath)
    if random_subset:
        indices = np.random.randint(0, data.shape[1], num_random_bands)
        data = data[:,indices]
        
    # print(data.shape)

    labels = np.load(labels_fpath)
    
    if Dataset == 'SM':

        train_images, val_images, train_labels, val_labels = train_test_split(data, 
                                                                labels, 
                                                                test_size=0.3, 
                                                                random_state=42)
        
        clf = LinearRegression()
        
        train_mu = np.mean(train_images)
        train_std = np.std(train_images)
        
        train_images = normalize(train_images, train_mu, train_std)
        val_images = normalize(val_images, train_mu, train_std)
        
        clf.fit(train_images, train_labels)
        val_predictions = clf.predict(val_images)
        mse = mean_squared_error(val_labels, val_predictions)
        mae = mean_absolute_error(val_labels, val_predictions)
        r2 = r2_score(val_labels, val_predictions)  
        
        return mse, mae, r2        
        
    else:
        train_images, val_images, train_labels, val_labels = train_test_split(data, 
                                                                labels, 
                                                                test_size=0.3, 
                                                                random_state=42,
                                                                stratify = labels)   
    
        clf = LogisticRegression(multi_class='multinomial')
        
        train_mu = np.mean(train_images)
        train_std = np.std(train_images)
        
        train_images = normalize(train_images, train_mu, train_std)
        val_images = normalize(val_images, train_mu, train_std)    
        
        clf.fit(train_images, train_labels)
        val_predictions = clf.predict(val_images)
        acc = accuracy_score(val_labels, val_predictions)        
        bac = balanced_accuracy_score(val_labels, val_predictions)
        f1 = f1_score(val_labels, val_predictions, average='macro')   
        
        return acc, bac, f1
    

## Plastic flakes dataset

In [5]:
# stacks all images vertically

# load data

if do_plastic_flakes:
    
    hyper, gt = load_datasets(
        'PF')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.empty([hyper.shape[0]*hyper.shape[1], hyper.shape[-1]])
    gt_multiple = np.empty([gt.shape[0]*gt.shape[1]])
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
    for i in range(hyper.shape[0]):
        hyper_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1] , :] = hyper[i, :, :]
        gt_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1]] = gt[i, :]

    print('The shape of the vertically stacked images:', hyper_multiple.shape)
    print('The shape of the vertically stacked images:', gt_multiple.shape)    



Dataset info...
The shape of the original imagery: (11, 112128, 224)
The shape of the original labels: (11, 112128)
The shape of the vertically stacked images: (1233408, 224)
The shape of the vertically stacked images: (1233408,)


In [6]:
# rewards
"""    
if do_plastic_flakes:
    
    num_runs = 25
    
    # randomly sample hyper_multiple for 5% of the pixels
    indices = np.random.randint(0, hyper_multiple.shape[0], int(hyper_multiple.shape[0]*0.05))
    hyper_multiple = hyper_multiple[indices, :]
    print('The shape of the sub-sampled vertically stacked images:', hyper_multiple.shape)
    
    correlations = []
    for i in range(num_runs):
        correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))
    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))
    
    mis = []
    for i in range(num_runs):
        mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))
    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))
    
    # plot rewards
    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    
    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)
    strings = a_string + b_string
    pd_df = pd.DataFrame([correlations+mis, strings]).T
    pd_df[0] = pd_df[0].astype(float, copy=True)
    pd_df.columns = ['Reward Metric']
    sns.histplot(data=pd_df, bins=15, x=0, hue=1, kde=True)
    plt.title(f'Test', fontsize=17)
    #plt.xlim([0,1])
    plt.show()
    plt.figure()
    """

"    \nif do_plastic_flakes:\n    \n    num_runs = 25\n    \n    # randomly sample hyper_multiple for 5% of the pixels\n    indices = np.random.randint(0, hyper_multiple.shape[0], int(hyper_multiple.shape[0]*0.05))\n    hyper_multiple = hyper_multiple[indices, :]\n    print('The shape of the sub-sampled vertically stacked images:', hyper_multiple.shape)\n    \n    correlations = []\n    for i in range(num_runs):\n        correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))\n    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))\n    \n    mis = []\n    for i in range(num_runs):\n        mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))\n    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))\n    \n    # plot rewards\n    a_string = ['pearson correlation (cumu

In [7]:
# baseline models

data_path = '../data/plastic_flakes/'
hsi_paths = glob.glob(data_path + 'hyperspectral_imagery/*.npy')
gt_paths = []
for i in range(len(hsi_paths)):
     num = hsi_paths[i].split('.')[2].split('/')[-1]
     gt_paths.append(data_path + f'gt_labels/{num}.npy')

#print(hsi_paths)
#print(gt_paths)

# model with all bands included

accs, bacs, f1s = [], [], []
for i in range(0, len(hsi_paths)):

    acc, bac, f1 = baseline_supervised_pass(hsi_paths[i], gt_paths[i], 'PF')
    accs.append(acc)
    bacs.append(bac)
    f1s.append(f1)

print(f'Baseline model performance on rescaled data (ints) with all bands...')
print(f'Validation Accuracy: {np.mean(accs)}')
print(f'Validation Balanced Accuracy: {np.mean(bacs)}')
print(f'Validation Macro Averaged F1 Score: {np.mean(f1s)}')

# model with random bands

accs, bacs, f1s = [], [], []
for i in range(0, len(hsi_paths)):

    acc, bac, f1 = baseline_supervised_pass(hsi_paths[i], gt_paths[i], 'PF', True, 30)
    accs.append(acc)
    bacs.append(bac)
    f1s.append(f1)

print(f'\nBaseline model performance on rescaled data (ints) with {30} band random subset...')
print(f'Validation Accuracy: {np.mean(accs)}')
print(f'Validation Balanced Accuracy: {np.mean(bacs)}')
print(f'Validation Macro Averaged F1 Score: {np.mean(f1s)}')

Baseline model performance on rescaled data (ints) with all bands...
Validation Accuracy: 0.9713617040826531
Validation Balanced Accuracy: 0.9548121247085622
Validation Macro Averaged F1 Score: 0.9530316150340309

Baseline model performance on rescaled data (ints) with 30 band random subset...
Validation Accuracy: 0.958792418972567
Validation Balanced Accuracy: 0.9316408124709102
Validation Macro Averaged F1 Score: 0.9315067333001887


## Salient objects dataset

In [8]:
# stacks all images vertically

# load data

if do_salient_objects:
    
    hyper, gt = load_datasets(
        'SO')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.empty([hyper.shape[0]*hyper.shape[1], hyper.shape[-1]])
    gt_multiple = np.empty([gt.shape[0]*gt.shape[1]])
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
    for i in range(hyper.shape[0]):
        hyper_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1] , :] = hyper[i, :, :]
        gt_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1]] = gt[i, :]

    print('\nDataset info...')
    print('The shape of the vertically stacked images:', hyper_multiple.shape)
    print('The shape of the vertically stacked images:', gt_multiple.shape)    

    # randomly sample hyper_multiple for .1% of the pixels
    indices = np.random.randint(0, hyper_multiple.shape[0], int(hyper_multiple.shape[0]*0.001))
    hyper_multiple = hyper_multiple[indices, :]
    print('The shape of the sub-sampled vertically stacked images:', hyper_multiple.shape)



Dataset info...
The shape of the original imagery: (60, 786432, 81)
The shape of the original labels: (60, 786432)

Dataset info...
The shape of the vertically stacked images: (47185920, 81)
The shape of the vertically stacked images: (47185920,)
The shape of the sub-sampled vertically stacked images: (47185, 81)


In [9]:
# rewards
"""    
if do_salient_objects:
    
    num_runs = 25
    
    correlations = []
    for i in range(num_runs):
        correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))
    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))
    
    mis = []
    for i in range(num_runs):
        mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))
    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))
    
    # plot rewards
    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    
    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)
    strings = a_string + b_string
    pd_df = pd.DataFrame([correlations+mis, strings]).T
    pd_df[0] = pd_df[0].astype(float, copy=True)
    pd_df.columns = ['Reward Metric']
    sns.histplot(data=pd_df, bins=20, x=0, hue=1, kde=True)
    plt.title(f'Test', fontsize=17)
    #plt.xlim([0,1])
    plt.show()
    plt.figure()
"""

"    \nif do_salient_objects:\n    \n    num_runs = 25\n    \n    correlations = []\n    for i in range(num_runs):\n        correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))\n    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))\n    \n    mis = []\n    for i in range(num_runs):\n        mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=30))\n    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))\n    \n    # plot rewards\n    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    \n    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)\n    strings = a_string + b_string\n    pd_df = pd.DataFrame([correlations+mis, strings]).T\n    pd_df[0] = pd_df[0].astype(float, copy=True)\n    pd_df.columns = ['Reward Metric']\n   

In [10]:
# baseline models

data_path = '../data/salient_objects/'
hsi_paths = glob.glob(data_path + 'hyperspectral_imagery/*.npy')
gt_paths = []
for i in range(len(hsi_paths)):
     num = hsi_paths[i].split('.')[2].split('/')[-1]
     gt_paths.append(data_path + f'gt_labels/{num}.npy')

#print(hsi_paths)
#print(gt_paths)

# model with all bands included

accs, bacs, f1s = [], [], []
for i in range(0, len(hsi_paths)):

    acc, bac, f1 = baseline_supervised_pass(hsi_paths[i], gt_paths[i], 'SO')
    accs.append(acc)
    bacs.append(bac)
    f1s.append(f1)

print(f'Baseline model performance on rescaled data (ints) with all bands...')
print(f'Validation Accuracy: {np.mean(accs)}')
print(f'Validation Balanced Accuracy: {np.mean(bacs)}')
print(f'Validation Macro Averaged F1 Score: {np.mean(f1s)}')

# model with random bands

accs, bacs, f1s = [], [], []
for i in range(0, len(hsi_paths)):

    acc, bac, f1 = baseline_supervised_pass(hsi_paths[i], gt_paths[i], 'SO', True, 30)
    accs.append(acc)
    bacs.append(bac)
    f1s.append(f1)

print(f'\nBaseline model performance on rescaled data (ints) with {30} band random subset...')
print(f'Validation Accuracy: {np.mean(accs)}')
print(f'Validation Balanced Accuracy: {np.mean(bacs)}')
print(f'Validation Macro Averaged F1 Score: {np.mean(f1s)}')

Baseline model performance on rescaled data (ints) with all bands...
Validation Accuracy: 0.9348171067689569
Validation Balanced Accuracy: 0.6789592114043078
Validation Macro Averaged F1 Score: 0.7062977883143245

Baseline model performance on rescaled data (ints) with 30 band random subset...
Validation Accuracy: 0.924376015484819
Validation Balanced Accuracy: 0.6157260357618256
Validation Macro Averaged F1 Score: 0.6314979792421563


In [None]:
## Plastic flakes dataset
# stacks all images vertically

# load data

if do_plastic_flakes:
    
    hyper, gt = load_datasets(
        'PF')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.empty([hyper.shape[0]*hyper.shape[1], hyper.shape[-1]])
    gt_multiple = np.empty([gt.shape[0]*gt.shape[1]])
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
    for i in range(hyper.shape[0]):
        hyper_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1] , :] = hyper[i, :, :]
        gt_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1]] = gt[i, :]

    print('The shape of the vertically stacked images:', hyper_multiple.shape)
    print('The shape of the vertically stacked images:', gt_multiple.shape)    

# rewards
    
if do_plastic_flakes:
    correlation = calculate_correlations(hyper, selected_bands)
    print(f'\nCorrelation:', correlation)
    mi = calculate_mutual_infos(hyper, selected_bands)
    print(f'Normalized mutual information:', mi)

if do_plastic_flakes and do_supervised_modeling:

    # baseline models
    
    data_path = '../plastic_flakes/'
    hsi_paths = glob.glob(data_path + 'hyperspectral_imagery/*.npy')
    gt_paths = []
    for i in range(len(hsi_paths)):
         num = hsi_paths[i].split('.')[2].split('/')[-1]
         gt_paths.append(data_path + f'gt_labels/{num}.npy')
    
    
    accs, bacs, f1s = [], [], []
    for i in range(0, len(hsi_paths)):
    
        acc, bac, f1 = baseline_supervised_pass(hsi_paths[i], gt_paths[i], 'PF', selected_bands)
        accs.append(acc)
        bacs.append(bac)
        f1s.append(f1)
    
    print(f'\nModel performance with RL agent selected bands...')
    print(f'Validation Accuracy: {np.mean(accs)}')
    print(f'Validation Balanced Accuracy: {np.mean(bacs)}')
    print(f'Validation Macro Averaged F1 Score: {np.mean(f1s)}')

## Salient objects dataset
# stacks all images vertically

# load data

if do_salient_objects:
    
    hyper, gt = load_datasets(
        'SO')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.empty([hyper.shape[0]*hyper.shape[1], hyper.shape[-1]])
    gt_multiple = np.empty([gt.shape[0]*gt.shape[1]])
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
    for i in range(hyper.shape[0]):
        hyper_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1] , :] = hyper[i, :, :]
        gt_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1]] = gt[i, :]

    print('\nDataset info...')
    print('The shape of the vertically stacked images:', hyper_multiple.shape)
    print('The shape of the vertically stacked images:', gt_multiple.shape)    

# rewards
    
if do_salient_objects:
    correlation = calculate_correlations(hyper, selected_bands)
    print(f'\nCorrelation:', correlation)
    mi = calculate_mutual_infos(hyper, selected_bands)
    print(f'Normalized mutual information:', mi)


if do_salient_objects and do_supervised_modeling:
    # baseline models
    
    data_path = '../salient_objects/'
    hsi_paths = glob.glob(data_path + 'hyperspectral_imagery/*.npy')
    gt_paths = []
    for i in range(len(hsi_paths)):
         num = hsi_paths[i].split('.')[2].split('/')[-1]
         gt_paths.append(data_path + f'gt_labels/{num}.npy')
    
    
    #print(hsi_paths)
    #print(gt_paths)
    
    accs, bacs, f1s = [], [], []
    for i in range(0, len(hsi_paths)):
    
        acc, bac, f1 = baseline_supervised_pass(hsi_paths[i], gt_paths[i], 'SO', selected_bands)
        accs.append(acc)
        bacs.append(bac)
        f1s.append(f1)
    
    print(f'\nModel performance with RL agent selected bands...')
    print(f'Validation Accuracy: {np.mean(accs)}')
    print(f'Validation Balanced Accuracy: {np.mean(bacs)}')
    print(f'Validation Macro Averaged F1 Score: {np.mean(f1s)}')

## Indian Pines dataset
# load data

if do_indian_pines:
    
    hyper, gt = load_datasets(
        'IN')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
# rewards
    
if do_indian_pines:
    correlation = calculate_correlations(hyper, selected_bands)
    print(f'\nCorrelation:', correlation)
    mi = calculate_mutual_infos(hyper, selected_bands)
    print(f'Normalized mutual information:', mi)

if do_indian_pines and do_supervised_modeling:


    # baseline models
    
    data_fpath = '../indian_pines/hyperspectral_imagery/indian_pines_corrected.npy'
    labels_fpath = '../indian_pines/gt_labels/indian_pines_gt.npy'
    
    # model with all bands included
    print(f'\nModel performance with RL agent selected bands...')
    acc, bac, f1 = baseline_supervised_pass(data_fpath, labels_fpath, 'IN', selected_bands)
    print(f'Validation Accuracy: {acc}')
    print(f'Validation Balanced Accuracy: {bac}')
    print(f'Validation Macro Averaged F1 Score: {f1}')
    ## Soil moisture dataset
    # load data

if do_soil_moisture:
    
    hyper, gt = load_datasets(
        'SM')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
# rewards
    
if do_soil_moisture:    
    correlation = calculate_correlations(hyper, selected_bands)
    print(f'\nCorrelation:', correlation)
    mi = calculate_mutual_infos(hyper, selected_bands)
    print(f'Normalized mutual information:', mi)

    
if do_soil_moisture and do_supervised_modeling:

    # baseline models 
    
    data_fpath = '../soil_moisture/hyperspectral_imagery/soil_moisture_hyper.npy'
    labels_fpath = '../soil_moisture/gt_labels/soil_moisture_gt.npy'
    
    print(f'\nModel performance with RL agent selected bands...')
    
    mse, mae, r2 = baseline_supervised_pass(data_fpath, labels_fpath, 'SM', selected_bands)
    print(f'Validation MSE: {mse}')
    print(f'Validation MAE: {mae}')
    print(f'Validation r2: {r2}')

## Foods dataset

# load data

if do_foods:
    
    hyper, gt = load_datasets(
        'Foods')

    hyper, gt = hyper[0], gt[0]
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
# rewards
    
if do_foods:
    correlation = calculate_correlations(hyper, selected_bands)
    print(f'\nCorrelation:', correlation)
    mi = calculate_mutual_infos(hyper, selected_bands)
    print(f'Normalized mutual information:', mi)

if do_foods and do_supervised_modeling:


    # baseline models
    
    data_fpath = '../foods/hyperspectral_imagery/foods_hyper.npy'
    labels_fpath = '../foods/gt_labels/foods_gt.npy'
    
    # model with all bands included
    print(f'\nModel performance with RL agent selected bands...')
    
    # model with random bands
    print(f'\nBaseline model performance on rescaled data (ints) with {30} band random subset...')
    acc, bac, f1 = baseline_supervised_pass(data_fpath, labels_fpath, 'Foods', selected_bands)
    print(f'Validation Accuracy: {acc}')
    print(f'Validation Balanced Accuracy: {bac}')
    print(f'Validation Macro Averaged F1 Score: {f1}')




Dataset info...
The shape of the original imagery: (679, 125)
The shape of the original labels: (679,)

Correlation: 0.957538007779195
Normalized mutual information: 0.6677776488857733

Model performance with RL agent selected bands...
Validation MSE: 2.1394362621755927
Validation MAE: 1.1637981283482837
Validation r2: 0.848383500398763


## Indian Pines dataset

In [11]:
# load data

if do_indian_pines:
    
    hyper, gt = load_datasets(
        'IN')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    


Dataset info...
The shape of the original imagery: (10249, 200)
The shape of the original labels: (10249,)


In [12]:
# rewards
"""    
if do_indian_pines:
    num_runs = 25
    
    correlations = []
    for i in range(num_runs):
        correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))
    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))
    
    mis = []
    for i in range(num_runs):
        mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))
    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))
    
    # plot rewards
    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    
    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)
    strings = a_string + b_string
    pd_df = pd.DataFrame([correlations+mis, strings]).T
    pd_df[0] = pd_df[0].astype(float, copy=True)
    pd_df.columns = ['Reward Metric']

    sns.histplot(data=pd_df, bins=20, x=0, hue=1, kde=True)
    plt.title(f'Test', fontsize=17)
    #plt.xlim([0,1])
    plt.show()
    plt.figure()
"""

"    \nif do_indian_pines:\n    num_runs = 25\n    \n    correlations = []\n    for i in range(num_runs):\n        correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))\n    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))\n    \n    mis = []\n    for i in range(num_runs):\n        mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))\n    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))\n    \n    # plot rewards\n    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    \n    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)\n    strings = a_string + b_string\n    pd_df = pd.DataFrame([correlations+mis, strings]).T\n    pd_df[0] = pd_df[0].astype(float, copy=True)\n    pd_df.columns = ['Reward Metric']\n\n    sns.histplot(data=pd_df, bins=20, x=0, hue

In [13]:
# baseline models

data_fpath = '../data/indian_pines/hyperspectral_imagery/indian_pines_corrected.npy'
labels_fpath = '../data/indian_pines/gt_labels/indian_pines_gt.npy'

# model with all bands included
print(f'Baseline model performance on rescaled data (ints) with all bands...')
acc, bac, f1 = baseline_supervised_pass(data_fpath, labels_fpath, 'IN')
print(f'Validation Accuracy: {acc}')
print(f'Validation Balanced Accuracy: {bac}')
print(f'Validation Macro Averaged F1 Score: {f1}')

# model with random bands
print(f'\nBaseline model performance on rescaled data (ints) with {30} band random subset...')
acc, bac, f1 = baseline_supervised_pass(data_fpath, labels_fpath, 'IN', True, 30)
print(f'Validation Accuracy: {acc}')
print(f'Validation Balanced Accuracy: {bac}')
print(f'Validation Macro Averaged F1 Score: {f1}')

Baseline model performance on rescaled data (ints) with all bands...
Validation Accuracy: 0.26504065040650404
Validation Balanced Accuracy: 0.08574691943516832
Validation Macro Averaged F1 Score: 0.06477141245017229

Baseline model performance on rescaled data (ints) with 30 band random subset...
Validation Accuracy: 0.24813008130081302
Validation Balanced Accuracy: 0.07261958833153297
Validation Macro Averaged F1 Score: 0.04460649329063645


## Soil moisture dataset

In [14]:
# load data

if do_soil_moisture:
    
    hyper, gt = load_datasets(
        'SM')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    


Dataset info...
The shape of the original imagery: (679, 125)
The shape of the original labels: (679,)


In [15]:
# rewards
"""
if do_soil_moisture:
    num_runs = 25
    
    correlations = []
    for i in range(num_runs):
        correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))
    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))
    
    mis = []
    for i in range(num_runs):
        mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))
    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))
    
    # plot rewards
    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    
    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)
    strings = a_string + b_string
    pd_df = pd.DataFrame([correlations+mis, strings]).T
    pd_df[0] = pd_df[0].astype(float, copy=True)
    pd_df.columns = ['Reward Metric']
    sns.histplot(data=pd_df, binwidth=0.008, x=0, hue=1, kde=True)
    plt.title(f'Test', fontsize=17)
    #plt.xlim([0,1])
    plt.show()
    plt.figure()
"""    

"\nif do_soil_moisture:\n    num_runs = 25\n    \n    correlations = []\n    for i in range(num_runs):\n        correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))\n    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))\n    \n    mis = []\n    for i in range(num_runs):\n        mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))\n    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))\n    \n    # plot rewards\n    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    \n    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)\n    strings = a_string + b_string\n    pd_df = pd.DataFrame([correlations+mis, strings]).T\n    pd_df[0] = pd_df[0].astype(float, copy=True)\n    pd_df.columns = ['Reward Metric']\n    sns.histplot(data=pd_df, binwidth=0.008, x=0, h

In [16]:
# baseline models 

data_fpath = '../data/soil_moisture/hyperspectral_imagery/soil_moisture_hyper.npy'
labels_fpath = '../data/soil_moisture/gt_labels/soil_moisture_gt.npy'

# model with all bands included
print(f'Baseline model performance on rescaled data (ints) with all bands...')
mse, mae, r2  = baseline_supervised_pass(data_fpath, labels_fpath, 'SM')
print(f'Validation MSE: {mse}')
print(f'Validation MAE: {mae}')
print(f'Validation r2: {r2}')

# model with random bands
print(f'\nBaseline model performance on rescaled data (ints) with {30} band random subset...')

mse, mae, r2 = baseline_supervised_pass(data_fpath, labels_fpath, 'SM', True, 30)
print(f'Validation MSE: {mse}')
print(f'Validation MAE: {mae}')
print(f'Validation r2: {r2}')

Baseline model performance on rescaled data (ints) with all bands...
Validation MSE: 2.207357810591
Validation MAE: 1.1874092003943846
Validation r2: 0.8435700700571795

Baseline model performance on rescaled data (ints) with 30 band random subset...
Validation MSE: 3.2095624547610444
Validation MAE: 1.353479616841915
Validation r2: 0.7725463322999038


## Foods dataset


In [17]:
# load data

if do_foods:
    
    hyper, gt = load_datasets(
        'Foods')

    hyper, gt = hyper[0], gt[0]
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
# rewards
"""    
if do_foods:
    num_runs = 25
    
    correlations = []
    for i in range(num_runs):
        correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))
    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))
    
    mis = []
    for i in range(num_runs):
        mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))
    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))
    
    # plot rewards
    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    
    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)
    strings = a_string + b_string
    pd_df = pd.DataFrame([correlations+mis, strings]).T
    pd_df[0] = pd_df[0].astype(float, copy=True)
    pd_df.columns = ['Reward Metric']
    sns.histplot(data=pd_df, bins=20, x=0, hue=1, kde=True)
    plt.title(f'Test', fontsize=17)
    #plt.xlim([0,1])
    plt.show()
    plt.figure()
"""


Dataset info...
The shape of the original imagery: (2400, 96)
The shape of the original labels: (2400,)


"    \nif do_foods:\n    num_runs = 25\n    \n    correlations = []\n    for i in range(num_runs):\n        correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))\n    print(f'\nCorrelation reward for random 30 bands, x{num_runs} runs:', np.mean(correlations))\n    \n    mis = []\n    for i in range(num_runs):\n        mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=30))\n    print(f'Normalized mutual information reward for random 30 bands, x{num_runs} runs:', np.mean(mis))\n    \n    # plot rewards\n    a_string = ['pearson correlation (cumulative avg)'] * len(correlations)    \n    b_string = ['normalized mutual information (cumulative avg)'] * len(mis)\n    strings = a_string + b_string\n    pd_df = pd.DataFrame([correlations+mis, strings]).T\n    pd_df[0] = pd_df[0].astype(float, copy=True)\n    pd_df.columns = ['Reward Metric']\n    sns.histplot(data=pd_df, bins=20, x=0, hue=1, kde=T

In [18]:

# baseline models

data_fpath = '../data/foods/hyperspectral_imagery/foods_hyper.npy'
labels_fpath = '../data/foods/gt_labels/foods_gt.npy'

# model with all bands included
print(f'Baseline model performance on rescaled data (ints) with all bands...')
acc, bac, f1 = baseline_supervised_pass(data_fpath, labels_fpath, 'Foods')
print(f'Validation Accuracy: {acc}')
print(f'Validation Balanced Accuracy: {bac}')
print(f'Validation Macro Averaged F1 Score: {f1}')

# model with random bands
print(f'\nBaseline model performance on rescaled data (ints) with {30} band random subset...')
acc, bac, f1 = baseline_supervised_pass(data_fpath, labels_fpath, 'Foods', True, 30)
print(f'Validation Accuracy: {acc}')
print(f'Validation Balanced Accuracy: {bac}')
print(f'Validation Macro Averaged F1 Score: {f1}')

Baseline model performance on rescaled data (ints) with all bands...
Validation Accuracy: 1.0
Validation Balanced Accuracy: 1.0
Validation Macro Averaged F1 Score: 1.0

Baseline model performance on rescaled data (ints) with 30 band random subset...
Validation Accuracy: 0.9986111111111111
Validation Balanced Accuracy: 0.9986111111111112
Validation Macro Averaged F1 Score: 0.9986110869980381
