# Setup

In [16]:
# here: implement variance ranking and 
# implement variance ranking in supervised baselines as well -> new notebook for just this?

In [17]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import image
import seaborn as sns
import glob
from scipy.stats.stats import pearsonr
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import normalized_mutual_info_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_absolute_error, accuracy_score, f1_score, balanced_accuracy_score, mean_squared_error, r2_score

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

np.random.seed(42)


In [18]:
# which datasets to read and write

do_indian_pines = True 
do_salient_objects = False
do_plastic_flakes = False
do_soil_moisture = True
do_foods = True


In [19]:
num_b_kept = 30

## Define functions

In [20]:
# reward functions

def calculate_correlations(data, num_bands_originally, num_bands_kept):
    
    #selected_bands = np.random.randint(0,num_bands_originally,num_bands_kept)
    selected_bands = np.arange(0, data.shape[-1])
    #print(selected_bands)    
    corr_sum = 0
    for i in selected_bands:
        for j in selected_bands:
            if i != j:
                corr_sum += np.abs(pearsonr(data[:, i], 
                                   data[:, j])[0])
            
    return corr_sum/(len(selected_bands)**2)


def calculate_mutual_infos(data, num_bands_originally, num_bands_kept):
    
    #selected_bands = np.random.randint(0,num_bands_originally,num_bands_kept)
    selected_bands = np.arange(0, data.shape[-1])
    #print(selected_bands)
    normalized_mutual_info_score_sum = 0
    for i in selected_bands:
        for j in selected_bands:
            if i != j:
                normalized_mutual_info_score_sum += normalized_mutual_info_score(data[:, i],
                                                                             data[:, j])
            
    return normalized_mutual_info_score_sum/(len(selected_bands)**2)


In [21]:
## dataset loading

def load_datasets(Dataset):
    
    if Dataset == 'SM':
        hyper_path = '../data/soil_moisture/hyperspectral_imagery/*npy'
        hyper = np.load(glob.glob(hyper_path)[0])
        gt_path = '../data/soil_moisture/gt_labels/*npy'
        gt = np.load(glob.glob(gt_path)[0])
        return hyper, gt
    
    if Dataset == 'IN':
        hyper_path = '../data/indian_pines/hyperspectral_imagery/*npy'
        hyper = np.load(glob.glob(hyper_path)[0])
        gt_path = '../data/indian_pines/gt_labels/*npy'
        gt = np.load(glob.glob(gt_path)[0])
        return hyper, gt
    
    if Dataset == 'SO':
        hyper_path = '../data/salient_objects/hyperspectral_imagery/salient-object-data.npy'
        gt_path = '../data/salient_objects/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels 

                          
    if Dataset == 'PF':
        hyper_path = '../data/plastic_flakes/hyperspectral_imagery/plastic-flake-data.npy'
        gt_path = '../data/plastic_flakes/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels 
    if Dataset == 'Foods':
        hyper_path = '../data/foods/hyperspectral_imagery/*npy'
        gt_path = '../data/foods/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels

## Plastic flakes dataset

In [22]:
# stacks all images vertically

# load data

if do_plastic_flakes:
    
    hyper, gt = load_datasets(
        'PF')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.squeeze(hyper)
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper_multiple.shape)
    print('The shape of the original labels:', gt.shape)
     


In [23]:
# rewards
    
if do_plastic_flakes:
        
    # randomly sample hyper_multiple for 5% of the pixels
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(correlations)
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(mis)
    print(f'Normalized mutual information reward', np.mean(mis))
    


## Salient objects dataset

In [24]:
# stacks all images vertically

# load data

if do_salient_objects:
    
    hyper, gt = load_datasets(
        'SO')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.sqeeze(hyper)
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper_multiple.shape)
    print('The shape of the original labels:', gt.shape)
    


In [25]:
# rewards
    
if do_salient_objects:
        
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))
    
    

## Indian Pines dataset

In [26]:
# load data

if do_indian_pines:
    
    hyper, gt = load_datasets(
        'IN')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    


Dataset info...
The shape of the original imagery: (10249, 200)
The shape of the original labels: (10249,)


In [27]:
# rewards
    
if do_indian_pines:
    num_runs = 50
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))
    


Correlation reward 0.5566290338374974
Normalized mutual information reward 0.26176094587599075


## Soil moisture dataset

In [28]:
# load data

if do_soil_moisture:
    
    hyper, gt = load_datasets(
        'SM')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    


Dataset info...
The shape of the original imagery: (679, 125)
The shape of the original labels: (679,)


In [29]:
# rewards
    
if do_soil_moisture:
    num_runs = 50
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))



Correlation reward 0.9784309489204578
Normalized mutual information reward 0.6573439144709141


## Foods dataset


In [30]:
# load data

if do_foods:
    
    hyper, gt = load_datasets(
        'Foods')

    hyper, gt = hyper[0], gt[0]
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
# rewards
    
if do_indian_pines:
    num_runs = 50
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))
    



Dataset info...
The shape of the original imagery: (2400, 96)
The shape of the original labels: (2400,)

Correlation reward 0.5940902419265641
Normalized mutual information reward 0.6066130724567937
