# Setup

In [17]:
# here: implement variance ranking and 
# implement variance ranking in supervised baselines as well -> new notebook for just this?

In [18]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import image
import seaborn as sns
import glob
from scipy.stats.stats import pearsonr
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import normalized_mutual_info_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import mean_absolute_error, accuracy_score, f1_score, balanced_accuracy_score, mean_squared_error, r2_score

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

np.random.seed(42)


In [19]:
# which datasets to read and write

do_indian_pines = False 
do_salient_objects = True
do_plastic_flakes = True
do_soil_moisture = False
do_foods = False


In [20]:
plastic_flakes_sample = 0.1
indian_pines_sample = 1 
salient_objects_sample = 0.01
foods_sample = 1
soil_moisture_sample = 1

In [21]:
num_b_kept = 30

## Define functions

In [22]:
# reward functions

def calculate_correlations(data, num_bands_originally, num_bands_kept):
    
    #selected_bands = np.random.randint(0,num_bands_originally,num_bands_kept)
    selected_bands = np.arange(0, data.shape[-1])
    #print(selected_bands)    
    corr_sum = 0
    for i in selected_bands:
        for j in selected_bands:
            if i != j:
                corr_sum += np.abs(pearsonr(data[:, i], 
                                   data[:, j])[0])
            
    return corr_sum/(len(selected_bands)**2)


def calculate_mutual_infos(data, num_bands_originally, num_bands_kept):
    
    #selected_bands = np.random.randint(0,num_bands_originally,num_bands_kept)
    selected_bands = np.arange(0, data.shape[-1])
    #print(selected_bands)
    normalized_mutual_info_score_sum = 0
    for i in selected_bands:
        for j in selected_bands:
            if i != j:
                normalized_mutual_info_score_sum += normalized_mutual_info_score(data[:, i],
                                                                             data[:, j])
            
    return normalized_mutual_info_score_sum/(len(selected_bands)**2)


In [23]:
## dataset loading

def load_datasets(Dataset):
    
    if Dataset == 'SM':
        hyper_path = '../data/soil_moisture/hyperspectral_imagery/*npy'
        hyper = np.load(glob.glob(hyper_path)[0])
        gt_path = '../data/soil_moisture/gt_labels/*npy'
        gt = np.load(glob.glob(gt_path)[0])
        return hyper, gt
    
    if Dataset == 'IN':
        hyper_path = '../data/indian_pines/hyperspectral_imagery/*npy'
        hyper = np.load(glob.glob(hyper_path)[0])
        gt_path = '../data/indian_pines/gt_labels/*npy'
        gt = np.load(glob.glob(gt_path)[0])
        return hyper, gt
    
    if Dataset == 'SO':
        hyper_path = '../data/salient_objects/hyperspectral_imagery/*npy'
        gt_path = '../data/salient_objects/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels

                          
    if Dataset == 'PF':
        hyper_path = '../data/plastic_flakes/hyperspectral_imagery/*npy'
        gt_path = '../data/plastic_flakes/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels
    
    if Dataset == 'Foods':
        hyper_path = '../data/foods/hyperspectral_imagery/*npy'
        gt_path = '../data/foods/gt_labels/*npy'
        hypers=[]
        gt_labels=[]
        for i in range(len(glob.glob(hyper_path))):
            hyper = np.load(glob.glob(hyper_path)[i])
            hypers.append(hyper)
            gt = np.load(glob.glob(gt_path)[i])
            gt_labels.append(gt)
        return hypers, gt_labels

## Plastic flakes dataset

In [24]:
# stacks all images vertically

# load data

if do_plastic_flakes:
    
    hyper, gt = load_datasets(
        'PF')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.empty([hyper.shape[0]*hyper.shape[1], hyper.shape[-1]])
    gt_multiple = np.empty([gt.shape[0]*gt.shape[1]])
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
    for i in range(hyper.shape[0]):
        hyper_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1] , :] = hyper[i, :, :]
        gt_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1]] = gt[i, :]

    print('The shape of the vertically stacked images:', hyper_multiple.shape)
    print('The shape of the vertically stacked images:', gt_multiple.shape)    



Dataset info...
The shape of the original imagery: (11, 112128, 224)
The shape of the original labels: (11, 112128)
The shape of the vertically stacked images: (1233408, 224)
The shape of the vertically stacked images: (1233408,)


In [25]:
# rewards
    
if do_plastic_flakes:
        
    # randomly sample hyper_multiple for 5% of the pixels
    indices = np.random.randint(0, hyper_multiple.shape[0], int(hyper_multiple.shape[0]*plastic_flakes_sample))
    hyper_multiple = hyper_multiple[indices, :]
    print('The shape of the sub-sampled vertically stacked images:', hyper_multiple.shape)
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(correlations)
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(mis)
    print(f'Normalized mutual information reward', np.mean(mis))
    


The shape of the sub-sampled vertically stacked images: (123340, 224)
[188 192 193 197 198 199 200 201 202 196 204 205 206 207 208 209 210 211
 212 213 214 215 216 217 218 219 195 194 187 203]
[0.9280932501728089]

Correlation reward 0.9280932501728089
[188 192 193 197 198 199 200 201 202 196 204 205 206 207 208 209 210 211
 212 213 214 215 216 217 218 219 195 194 187 203]
[0.5756094262873841]
Normalized mutual information reward 0.5756094262873841


## Salient objects dataset

In [26]:
# stacks all images vertically

# load data

if do_salient_objects:
    
    hyper, gt = load_datasets(
        'SO')
    
    hyper, gt = np.array(hyper), np.array(gt)
    
    hyper_multiple = np.empty([hyper.shape[0]*hyper.shape[1], hyper.shape[-1]])
    gt_multiple = np.empty([gt.shape[0]*gt.shape[1]])
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
    for i in range(hyper.shape[0]):
        hyper_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1] , :] = hyper[i, :, :]
        gt_multiple[i*hyper.shape[1]:(i+1)*hyper.shape[1]] = gt[i, :]

    print('\nDataset info...')
    print('The shape of the vertically stacked images:', hyper_multiple.shape)
    print('The shape of the vertically stacked images:', gt_multiple.shape)    



Dataset info...
The shape of the original imagery: (60, 786432, 81)
The shape of the original labels: (60, 786432)

Dataset info...
The shape of the vertically stacked images: (47185920, 81)
The shape of the vertically stacked images: (47185920,)


In [27]:
# rewards
    
if do_salient_objects:
        
    # randomly sample hyper_multiple for 1% of the pixels
    indices = np.random.randint(0, hyper_multiple.shape[0], int(hyper_multiple.shape[0]*salient_objects_sample))
    hyper_multiple = hyper_multiple[indices, :]
    print('The shape of the sub-sampled vertically stacked images:', hyper_multiple.shape)
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper_multiple, num_bands_originally=hyper_multiple.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))
    
    

The shape of the sub-sampled vertically stacked images: (471859, 81)
[50 28 40 49 48 47 46 45 44 43 42 41 39 38 37 36 35 34 33 32 71 31 30 29
 27 76 23 22 21 80]

Correlation reward 0.12688449348487058
[50 28 40 49 48 47 46 45 44 43 42 41 39 38 37 36 35 34 33 32 71 31 30 29
 27 76 23 22 21 80]
Normalized mutual information reward 0.1394126638935471


## Indian Pines dataset

In [28]:
# load data

if do_indian_pines:
    
    hyper, gt = load_datasets(
        'IN')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    

In [29]:
# rewards
    
if do_indian_pines:
    num_runs = 50
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))
    

## Soil moisture dataset

In [30]:
# load data

if do_soil_moisture:
    
    hyper, gt = load_datasets(
        'SM')
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    

In [31]:
# rewards
    
if do_soil_moisture:
    num_runs = 50
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))


## Foods dataset


In [32]:
# load data

if do_foods:
    
    hyper, gt = load_datasets(
        'Foods')

    hyper, gt = hyper[0], gt[0]
    
    print('\nDataset info...')
    print('The shape of the original imagery:', hyper.shape)
    print('The shape of the original labels:', gt.shape)
    
# rewards
    
if do_indian_pines:
    num_runs = 50
    
    correlations = []
    #for i in range(num_runs):
    correlations.append(calculate_correlations(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'\nCorrelation reward', np.mean(correlations))
    
    mis = []
    #for i in range(num_runs):
    mis.append(calculate_mutual_infos(hyper, num_bands_originally=hyper.shape[-1], num_bands_kept=num_b_kept))
    print(f'Normalized mutual information reward', np.mean(mis))
    
