## Set up google colab environment

In [None]:
from google.colab import drive 
drive.mount('/content/drive')

In [2]:
import os
os.chdir('/content/drive/My Drive/Hyperspectral_Image_Classification/code')

In [1]:
from source_model_utils_indian_pines import *
import scipy.io as sio

## Load Indian Pines Dataset

In [2]:
uIndianPines = sio.loadmat('Indian_pines_corrected.mat')
gt_IndianPines = sio.loadmat('Indian_pines_gt.mat')

In [3]:
data = uIndianPines['indian_pines_corrected']
ground_truth = gt_IndianPines['indian_pines_gt']

In [4]:
data.shape

(145, 145, 200)

In [5]:
ground_truth.shape

(145, 145)

## Distrubution of samples for each class

In [6]:
class_distribution = pd.DataFrame(np.unique(ground_truth, return_counts = True))
class_distribution = class_distribution.transpose()
class_distribution.columns = ['class','samples']
class_distribution

Unnamed: 0,class,samples
0,0,10776
1,1,46
2,2,1428
3,3,830
4,4,237
5,5,483
6,6,730
7,7,28
8,8,478
9,9,20


In [8]:
classes , counts = np.unique(ground_truth, return_counts = True)
classes = classes[[2,3,5,6,8,10,11,12,14]] ## Dropping classes with small number of samples
classes

array([ 2,  3,  5,  6,  8, 10, 11, 12, 14], dtype=uint8)

In [9]:
## Pick samples belonging to all classes

def pick_samples_from_class(Class, cube_size, data, ground_truth, cubes, output_class, overlap_ratio, channels):
    
    ## Get row and column position from ground truth image for class
    class_indices = np.where(ground_truth == Class)
    
    ## Remove border position class samples
    class_cube_positions = [[class_indices[0][i], class_indices[1][i]] for i in range(len(class_indices[0])) 
                        if len(ground_truth) - np.ceil(cube_size / 2) > class_indices[0][i] > np.ceil(cube_size / 2) 
                        and len(ground_truth[0]) - np.ceil(cube_size / 2) > class_indices[1][i] > np.ceil(cube_size / 2)]
    
    #print('Length of class positions', len(class_cube_positions))
    
    extracted_cubes = [[class_cube_positions[0][0], class_cube_positions[0][1]]]
    
    ## Form the first cube for this class
    cubes.append(np.array(data[class_cube_positions[0][0] - int(cube_size / 2):class_cube_positions[0][0] + int(cube_size / 2),
                       (class_cube_positions[0][1] - int(cube_size / 2)):class_cube_positions[0][1] + int(cube_size / 2),
                         :channels]))
    
    ## Output class value
    output_class.append(Class)
        
    ## Pick cube/sample if it satisfies the criteria for the overlap ratio
    for i in range(1, len(class_cube_positions)):
        
        distance_vector = [] ## Calculate distance from existing sample to the next candiddate cube sample
        
        for k in range(len(extracted_cubes)):
            
            distance = math.sqrt((class_cube_positions[i][0] - extracted_cubes[k][0]) ** 2 + 
                                 (class_cube_positions[i][1] - extracted_cubes[k][1]) ** 2)
            
            distance_vector.append(distance)
            
        if np.min(distance_vector) > int(cube_size * (1 - overlap_ratio)):
            
            cubes.append(np.array(data[class_cube_positions[i][0] - int(cube_size / 2):class_cube_positions[i][0] + int(cube_size / 2),
                                      (class_cube_positions[i][1] - int(cube_size / 2)):class_cube_positions[i][1] + int(cube_size / 2),
                                      :channels]))
            
            output_class.append(Class)
            extracted_cubes.append([class_cube_positions[i][0], class_cube_positions[i][1]])
            
    return cubes, output_class, extracted_cubes

## Collect and combine samples from all classes

def collect_samples_from_all_classes(classes, cube_size, data, ground_truth, cubes, output_class, overlap_ratio, channels):
    
    class_samples = []
    
    for Class in classes:
        cubes, output_class, extracted_cubes = pick_samples_from_class(Class, cube_size, data, ground_truth, cubes, 
                                                                       output_class,overlap_ratio, channels)
        class_samples.append(len(extracted_cubes))
    
    cubes = np.array(cubes)
    output_class = np.array(output_class)
    
    print('Class Samples : ', class_samples)
    
    return cubes, output_class, class_samples

## Prepare Training, Validation & Test Data

def training_and_test_set(training_samples_from_each_class, 
                          class_samples, cubes, output_class):
    
    class_2_samples = cubes[np.where(output_class == 2)[0]]
    class_2_labels = output_class[np.where(output_class == 2)[0]]

    class_3_samples = cubes[np.where(output_class == 3)[0]]
    class_3_labels = output_class[np.where(output_class == 3)[0]]

    class_5_samples = cubes[np.where(output_class == 5)[0]]
    class_5_labels = output_class[np.where(output_class == 5)[0]]

    class_6_samples = cubes[np.where(output_class == 6)[0]]
    class_6_labels = output_class[np.where(output_class == 6)[0]]

    class_8_samples = cubes[np.where(output_class == 8)[0]]
    class_8_labels = output_class[np.where(output_class == 8)[0]]

    class_10_samples = cubes[np.where(output_class == 10)[0]]
    class_10_labels = output_class[np.where(output_class == 10)[0]]
    
    class_11_samples = cubes[np.where(output_class == 11)[0]]
    class_11_labels = output_class[np.where(output_class == 11)[0]]
    
    class_12_samples = cubes[np.where(output_class == 12)[0]]
    class_12_labels = output_class[np.where(output_class == 12)[0]]
    
    class_14_samples = cubes[np.where(output_class == 14)[0]]
    class_14_labels = output_class[np.where(output_class == 14)[0]]


    class_samples_collection = [class_2_samples, class_3_samples, class_5_samples, class_6_samples,
                               class_8_samples, class_10_samples, class_11_samples, class_12_samples, class_14_samples]

    class_labels_collection = [class_2_samples, class_3_samples, class_5_samples, class_6_samples,
                               class_8_samples, class_10_samples, class_11_samples, class_12_samples, class_14_samples]

    # Training & Test Set Arrays
    X_train = []
    X_test = []

    y_train = []
    y_test = []

    # Get Training set size samples from each class
    for samples in class_samples_collection:
        
        X_train.append(samples[0:training_samples_from_each_class])
        
        X_test.append(samples[training_samples_from_each_class:])
        
    # Get output labels
    for labels in class_labels_collection:
        y_train.append(labels[0:training_samples_from_each_class])
        
        y_test.append(labels[training_samples_from_each_class :])

    X_train = np.concatenate(X_train, axis = 0)
    X_test = np.concatenate(X_test, axis = 0)

    y_train = np.concatenate(y_train, axis = 0)
    y_test = np.concatenate(y_test, axis = 0)

    
    ## Shuffle Training Set
    samples_train = np.arange(X_train.shape[0])
    np.random.shuffle(samples_train)

    X_train = X_train[samples_train]
    y_train = y_train[samples_train]


    ## Shuffle Test Set
    samples_test = np.arange(X_test.shape[0])
    np.random.shuffle(samples_test)

    X_test = X_test[samples_test]
    y_test = y_test[samples_test]

    # Get counts(samples) of each class in test set
    values_test_set, counts_test_set = np.unique(y_test, return_counts = True)
    values_training_set, counts_training_set = np.unique(y_train, return_counts = True)


    print("Samples per class: " + str(class_samples) + '\n'
          "Total number of samples is " + str(np.sum(class_samples)) + '.\n')
    
    print("unique classes in training set: " + str(values_training_set) + '\n'
          "Total number of samples in training set is " + str(np.sum(counts_training_set)) + '.\n'
          "Samples per class in training set: " + str(counts_training_set) + '\n')

    print("unique classes in test set: " + str(values_test_set) + '\n'
          "Total number of samples in test set is " + str(np.sum(counts_test_set)) + '.\n'
          "Samples per class in test set: " + str(counts_test_set) + '\n')
    print('\n')

    ## one hot encode labels
    onehot_encoder = OneHotEncoder(sparse = False)

    y_train = y_train.reshape(len(y_train), 1)
    y_test = y_test.reshape(len(y_test), 1)

    y_train = onehot_encoder.fit_transform(y_train)
    y_test = onehot_encoder.fit_transform(y_test)

    return X_train, X_test, y_train, y_test, counts_test_set, class_samples


def sample_extraction(classes, cube_size, data, ground_truth, cubes, output_class, training_samples_from_each_class,
                      overlap_ratio, channels):
    
    cubes, output_class, class_samples = collect_samples_from_all_classes(classes, 
                                                                      cube_size, 
                                                                      data,  
                                                                      ground_truth, 
                                                                      cubes, 
                                                                      output_class , 
                                                                      overlap_ratio, 
                                                                      channels)
    
    X_train, X_test, y_train, y_test, counts_test_set, class_samples = training_and_test_set(
                                                                            training_samples_from_each_class,
                                                                            class_samples, 
                                                                            cubes,
                                                                            output_class)
    
    return X_train, X_test, y_train, y_test, counts_test_set, class_samples


In [10]:
cubes, output_class, class_samples = collect_samples_from_all_classes(classes = classes, 
                                                                      cube_size = 20, 
                                                                      data = data, 
                                                                      ground_truth = ground_truth, 
                                                                      cubes = [], 
                                                                      output_class = [], 
                                                                      overlap_ratio = 1, 
                                                                      channels = 64)

Class Samples :  [1368, 523, 323, 730, 356, 837, 2224, 460, 1085]


In [12]:
np.unique(output_class, return_counts = True)

(array([ 2,  3,  5,  6,  8, 10, 11, 12, 14], dtype=uint8),
 array([1368,  523,  323,  730,  356,  837, 2224,  460, 1085]))

In [19]:
class_2_samples = cubes[np.where(output_class == 2)[0]]
class_2_labels = output_class[np.where(output_class == 2)[0]]

class_3_samples = cubes[np.where(output_class == 3)[0]]
class_3_labels = output_class[np.where(output_class == 3)[0]]

class_5_samples = cubes[np.where(output_class == 5)[0]]
class_5_labels = output_class[np.where(output_class == 5)[0]]

class_6_samples = cubes[np.where(output_class == 6)[0]]
class_6_labels = output_class[np.where(output_class == 6)[0]]

class_8_samples = cubes[np.where(output_class == 8)[0]]
class_8_labels = output_class[np.where(output_class == 8)[0]]

class_10_samples = cubes[np.where(output_class == 10)[0]]
class_10_labels = output_class[np.where(output_class == 10)[0]]

class_11_samples = cubes[np.where(output_class == 11)[0]]
class_11_labels = output_class[np.where(output_class == 11)[0]]

class_12_samples = cubes[np.where(output_class == 12)[0]]
class_12_labels = output_class[np.where(output_class == 12)[0]]

class_14_samples = cubes[np.where(output_class == 14)[0]]
class_14_labels = output_class[np.where(output_class == 14)[0]]


class_samples_collection = [class_2_samples, class_3_samples, class_5_samples, class_6_samples,
                           class_8_samples, class_10_samples, class_11_samples, class_12_samples, class_14_samples]

class_labels_collection = [class_2_labels, class_3_labels, class_5_labels, class_6_labels,
                               class_8_labels, class_10_labels, class_11_labels, class_12_labels, class_14_labels]


In [20]:
# Training & Test Set Arrays
X_train = []
X_test = []

y_train = []
y_test = []

# Get Training set size samples from each class
for samples in class_samples_collection:

    X_train.append(samples[0:200])

    X_test.append(samples[200:])

# Get output labels
for labels in class_labels_collection:
    y_train.append(labels[0:200])

    y_test.append(labels[200:])

X_train = np.concatenate(X_train, axis = 0)
X_test = np.concatenate(X_test, axis = 0)

y_train = np.concatenate(y_train, axis = 0)
y_test = np.concatenate(y_test, axis = 0)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(1800, 20, 20, 64)
(6106, 20, 20, 64)
(1800,)
(6106,)


In [22]:
## Shuffle Training Set
samples_train = np.arange(X_train.shape[0])
np.random.shuffle(samples_train)

X_train = X_train[samples_train]
y_train = y_train[samples_train]


## Shuffle Test Set
samples_test = np.arange(X_test.shape[0])
np.random.shuffle(samples_test)

X_test = X_test[samples_test]
y_test = y_test[samples_test]

# Get counts(samples) of each class in test set
values_test_set, counts_test_set = np.unique(y_test, return_counts = True)
values_training_set, counts_training_set = np.unique(y_train, return_counts = True)


print("Samples per class: " + str(class_samples) + '\n'
      "Total number of samples is " + str(np.sum(class_samples)) + '.\n')

print("unique classes in training set: " + str(values_training_set) + '\n'
      "Total number of samples in training set is " + str(np.sum(counts_training_set)) + '.\n'
      "Samples per class in training set: " + str(counts_training_set) + '\n')

print("unique classes in test set: " + str(values_test_set) + '\n'
      "Total number of samples in test set is " + str(np.sum(counts_test_set)) + '.\n'
      "Samples per class in test set: " + str(counts_test_set) + '\n')
print('\n')


Samples per class: [1368, 523, 323, 730, 356, 837, 2224, 460, 1085]
Total number of samples is 7906.

unique classes in training set: [ 2  3  5  6  8 10 11 12 14]
Total number of samples in training set is 1800.
Samples per class in training set: [200 200 200 200 200 200 200 200 200]

unique classes in test set: [ 2  3  5  6  8 10 11 12 14]
Total number of samples in test set is 6106.
Samples per class in test set: [1168  323  123  530  156  637 2024  260  885]





In [21]:
np.unique(y_train, return_counts = True)

(array([ 2,  3,  5,  6,  8, 10, 11, 12, 14], dtype=uint8),
 array([200, 200, 200, 200, 200, 200, 200, 200, 200]))

In [23]:
## one hot encode labels
onehot_encoder = OneHotEncoder(sparse = False)

y_train = y_train.reshape(len(y_train), 1)
y_test = y_test.reshape(len(y_test), 1)

y_train = onehot_encoder.fit_transform(y_train)
y_test = onehot_encoder.fit_transform(y_test)

In [24]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(1800, 20, 20, 64)
(6106, 20, 20, 64)
(1800, 9)
(6106, 9)


## Train model with overlap ratio 1 for 100 epochs (600 samples from each class in training set)

In [10]:
indian_pines__source_model_with_overlap_ratio_1_and_5_epochs = Training(training_set_size = [200],
                                                                 classes = classes,
                                                                 cube_size = 20,
                                                                 overlap_ratio = 1,
                                                                 data = data,
                                                                 ground_truth = ground_truth,
                                                                 batch_size = 20,
                                                                 channels = 64,
                                                                 epochs = 5,
                                                                 Verbosity = 1,
                                                                 accuracies = [],
                                                                 learning_rate = 0.0001)


Model training starts for data with 600 samples from each class in training set

Class Samples :  [5975, 15062, 1742, 2854, 1345, 5029, 1330, 3682, 940]
Samples per class: [5975, 15062, 1742, 2854, 1345, 5029, 1330, 3682, 940]
Total number of samples is 37959.

unique classes in training set: [1 2 3 4 5 6 7 8 9]
Total number of samples in training set is 5400.
Samples per class in training set: [600 600 600 600 600 600 600 600 600]

unique classes in test set: [1 2 3 4 5 6 7 8 9]
Total number of samples in test set is 32559.
Samples per class in test set: [ 5375 14462  1142  2254   745  4429   730  3082   340]



X_train => (5400, 20, 20, 64)
X_test  => (32559, 20, 20, 64)
y_train => (5400, 9)
y_test  => (32559, 9)

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 20, 20, 64)] 