In [None]:
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.io import savemat
from pathlib import Path

# classification and visualizarion imports
import utils
import seaborn as sn
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix

In [None]:
#Spectral dataset must be in .mat files, for signatires matrix should be (samples, wavelengths)

#Load from .mat

#root for datasets
pwdpath=os.getcwd()
root = f"{pwdpath}\HSI_Files\\"

datasetName = "Corn20220623"

im_, gt_ = f'{datasetName}Data', f'{datasetName}Labels'
#results dir
result = f'{pwdpath}\Masters_Results\{datasetName}\\'


hsi_path = {'img_path': f'{root}{im_}.mat', 'gt_path': f'{root}{gt_}.mat'}

In [None]:
#Load saved selected bands
saved_bands = loadmat(datasetName + "Bands.mat")
bands_selected = {}
for key in saved_bands.keys():
    if "__" not in key:
        bands_selected[key] = np.array(saved_bands[key])
print(bands_selected)

In [None]:
#Random Forest Grid Search parameters
steps_RF = [('scaler',MinMaxScaler()),('classifier',RandomForestClassifier())]
pipe_RF = Pipeline(steps_RF)

n_estimators = [50,100,400]
max_features = ['auto']
max_depth = [6, 30, 60]
min_samples_split = [2, 6, 10]
min_samples_leaf = [3]

param_grid_RF = dict(classifier__n_estimators = n_estimators,
                  classifier__max_features = max_features,
                  classifier__max_depth = max_depth,
                  classifier__min_samples_split = min_samples_split,
                  classifier__min_samples_leaf = min_samples_leaf
                  )

RF = GridSearchCV(pipe_RF, param_grid=param_grid_RF,refit = True, n_jobs=4, verbose = 5)

In [None]:
#SVM Grid Search parameters

param_grid_SVM = {'C': [1000, 2000], 'gamma': [1],
'kernel': ['rbf', 'poly'], 'degree':[2,5,10]}

SVM = GridSearchCV(svm.SVC(), param_grid=param_grid_SVM,refit = True, n_jobs=4, verbose = 5)

In [None]:
#Multi layer perceptron Grid Search parameters

param_grid_MLP = {
    'hidden_layer_sizes': [ (500,300,100,50), (1000,500,300,100,50), (2000,500,50), (2000,500,250,100,50,20)],
    'activation': ['relu'],
    'alpha': [0.00000001,0.0000001,0.000001]
}

MLP = GridSearchCV(MLPClassifier(), param_grid=param_grid_MLP,refit = True, n_jobs=4, verbose = 5)

In [None]:
#Loading Data and labels
Labels = loadmat(hsi_path['gt_path'])
for key in Labels.keys():
    if "__" not in key:
        Labels = Labels[key]
        break
Labels = np.array(Labels)

if Labels.shape[0] < Labels.shape[1]:
    Labels = Labels.T

Data = loadmat(hsi_path['img_path'])
for key in Data.keys():
    if "__" not in key:
        Data = Data[key]
        break
Data = np.array(Data)

if Data.shape[0] != Labels.shape[0]:
    Data = Data.T

Data_filtered = []
Labels_filtered = []

for i in range(Data.shape[0]):
    #ignoring middle classes to classifie with two
    #if Labels[i] == 1 or Labels[i] == 4:
        
    Data_filtered.append(Data[i,:])
    Labels_filtered.append(Labels[i])

Data = np.array(Data_filtered)
Labels = np.array(Labels_filtered)

#dictionary to save classifier algorithms objects, must have .fit and a .predict methods
Classifiers = {}
Classifiers["RF"] = RF
Classifiers["SVM"] = SVM
Classifiers["MLP"] = MLP

Accuracy_values = {}
Best_parameters = {}

for c in Classifiers:
    Accuracy_values[c] = {}
    Best_parameters[c] = {}

    for alg in bands_selected:
        Accuracy_values[c][alg] = []
        Best_parameters[c][alg] = []


In [None]:
iter = 8
factor = 4
        
#reduction algorithm
#alg = "ocf"
#alg = "bombs"
alg = "svd"

#number of bands
b_count = 20

i = int((b_count/factor) - 1)

#takes selected bands from dataset
DataSelected = Data[:,bands_selected[alg][i,0:b_count]]

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(DataSelected, Labels,
                                                    test_size=0.2, random_state=11, stratify = Labels)


In [None]:
RF.fit(X_train, y_train.ravel())

In [None]:
RF.best_params_


In [None]:
RF.best_score_

In [None]:
SVM.fit(X_train, y_train.ravel())

In [None]:
SVM.best_params_

In [None]:
SVM.best_score_

In [None]:
MLP.fit(X_train, y_train.ravel())

In [None]:
MLP.best_params_


In [None]:
MLP.best_score_