In [2]:
import numpy as np
import random
import heapq
from deap import base, creator, tools, algorithms
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score, mutual_info_score
from skimage.metrics import structural_similarity as SSIM

In [3]:
# Load the dataset
image = np.load(r"C:\Users\NNadi\Downloads\Band_Selection_SR-NMI-VI\cubert_drone_data\images.npy")  # Hyperspectral data (1217 × 303 × 274)
labels = np.load(r"C:\Users\NNadi\Downloads\Band_Selection_SR-NMI-VI\cubert_drone_data\labels.npy")  # Ground truth (1217 × 303)
print(image.shape)
print(labels.shape)
num_bands = image.shape[-1]  
optimized_bands = [4, 136, 41, 106, 43, 46, 15, 47, 112, 50, 52, 23, 121, 62, 57]
optimized_image = image[:, :, optimized_bands]
np.save(r'C:\Users\NNadi\Downloads\images.npy', optimized_image)

print("Optimized image shape:", optimized_image.shape)


(1001, 1001, 138)
(1001, 1001)
Optimized image shape: (1001, 1001, 15)


In [4]:
t=np.load(r'C:\Users\NNadi\Downloads\images.npy')
print(t.shape)

(1001, 1001, 15)


In [3]:
selected_region = image[0:128, 0:128, :]
nmi_matrix = np.zeros((num_bands, num_bands))
ssim_matrix = np.zeros((num_bands, num_bands))
dissimilarity_matrix = np.zeros((num_bands, num_bands))

In [4]:
def calculate_entropy(data):
    _, counts = np.unique(data, return_counts=True)
    probabilities = counts / counts.sum()
    if len(probabilities) == 1:
        return 0
    return -np.sum(probabilities * np.log2(probabilities))
def discretize_band(band, num_bins=256):
    band_min = np.min(band)
    band_max = np.max(band)
    bins = np.linspace(band_min, band_max, num_bins)
    discrete_band = np.digitize(band, bins) - 1 
    return discrete_band
for i in range(num_bands):
    for j in range(i + 1, num_bands):
        band_i = discretize_band(selected_region[:, :, i].flatten())
        band_j = discretize_band(selected_region[:, :, j].flatten())
        if len(np.unique(band_i)) > 1 and len(np.unique(band_j)) > 1:
            mi = mutual_info_score(band_i, band_j)
            h_i = calculate_entropy(band_i)
            h_j = calculate_entropy(band_j)
            if h_i > 0 and h_j > 0:
                dissimilarity_matrix[i, j] = h_i + h_j - 2 * mi
                dissimilarity_matrix[j, i] = dissimilarity_matrix[i, j]
                nmi_matrix[i, j] = 2 * mi / (h_i + h_j)
                nmi_matrix[j, i] = nmi_matrix[i, j]
            ssim_value = SSIM(
                selected_region[:, :, i],
                selected_region[:, :, j],
                data_range=selected_region[:, :, i].max() - selected_region[:, :, i].min()
            )
            ssim_matrix[i, j] = ssim_value
            ssim_matrix[j, i] = ssim_value
hybrid_similarity_matrix = nmi_matrix + ssim_matrix
def rank_bands(sim_matrix, dis_matrix, num_bands_to_select=50):
    avg_similarity = np.nanmean(sim_matrix, axis=1)
    dissimilarity = np.nanmin(dis_matrix, axis=1)
    range_similarity = avg_similarity.max() - avg_similarity.min()
    range_dissimilarity = dissimilarity.max() - dissimilarity.min()
    normalized_similarity = (avg_similarity - avg_similarity.min()) / range_similarity if range_similarity != 0 else np.zeros_like(avg_similarity)
    normalized_dissimilarity = (dissimilarity - dissimilarity.min()) / range_dissimilarity if range_dissimilarity != 0 else np.zeros_like(dissimilarity)
    scores = normalized_similarity * normalized_dissimilarity
    ranked_indices = np.argsort(scores)[::-1]
    return ranked_indices[:num_bands_to_select]
num_bands_to_select = 100
sr_nmi_vi_ranked_bands = rank_bands(nmi_matrix, dissimilarity_matrix, num_bands_to_select)

print(f"Top {num_bands_to_select} SR-NMI-VI Ranked Bands: {sr_nmi_vi_ranked_bands}")


Top 100 SR-NMI-VI Ranked Bands: [137  43  49  48  47  46  45  44  42  34  41  40  39  38  37  36  50  51
  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  35  33 136
   8  14  13  12  11  10   9   7  32   6   5   4   3   2   1  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  67  68  69 112
 118 117 116 115 114 113 111  70 110 109 108 107 106 105 119 120 121 122
 123 124 125 126 127 128 129 130 131 132]


In [5]:
labels_flat = labels.flatten()
labels_flat=labels_flat[0:4000]
unique_classes, class_counts = np.unique(labels_flat, return_counts=True)
for cls, count in zip(unique_classes, class_counts):
    print(f"Class {cls}: {count} samples")

Class 0.0: 1253 samples
Class 1.0: 245 samples
Class 2.0: 541 samples
Class 3.0: 1961 samples


In [6]:
import numpy as np
import random
from deap import base, creator, tools, algorithms
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score

POP_SIZE = 50   
N_GEN = 50     
MUTATION_RATE = 0.2
CROSSOVER_RATE = 0.7
MIN_BANDS = 10  
MAX_BANDS = 20 
sr_nmi_vi_ranked_bands = sr_nmi_vi_ranked_bands.tolist() if isinstance(sr_nmi_vi_ranked_bands, np.ndarray) else sr_nmi_vi_ranked_bands
try:
    del creator.FitnessMax
    del creator.Individual
except AttributeError:
    pass

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

# Initialize an individual with a random number of bands between MIN_BANDS and MAX_BANDS
def init_individual():
    num_selected_bands = random.randint(MIN_BANDS, MAX_BANDS)
    return creator.Individual(random.sample(sr_nmi_vi_ranked_bands, num_selected_bands))

toolbox = base.Toolbox()
toolbox.register("individual", init_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Mutation: Replace one band while maintaining the length constraint
def mutate_individual(individual):
    if random.random() < MUTATION_RATE:
        idx_to_replace = random.randint(0, len(individual) - 1)
        available_bands = list(set(sr_nmi_vi_ranked_bands) - set(individual))
        if available_bands:
            individual[idx_to_replace] = random.choice(available_bands)
    return (individual,)

# Crossover: Ensure the number of bands remains within constraints
def crossover(parent1, parent2):
    crossover_point = random.randint(1, min(len(parent1), len(parent2)) - 1)
    
    child1_genes = list(set(parent1[:crossover_point] + parent2[crossover_point:]))
    child2_genes = list(set(parent2[:crossover_point] + parent1[crossover_point:]))

    # Ensure valid band count
    child1_genes = child1_genes[:MAX_BANDS] if len(child1_genes) > MAX_BANDS else child1_genes
    child2_genes = child2_genes[:MAX_BANDS] if len(child2_genes) > MAX_BANDS else child2_genes
    
    # Fill up to MIN_BANDS if needed
    while len(child1_genes) < MIN_BANDS:
        new_band = random.choice(sr_nmi_vi_ranked_bands)
        if new_band not in child1_genes:
            child1_genes.append(new_band)
    
    while len(child2_genes) < MIN_BANDS:
        new_band = random.choice(sr_nmi_vi_ranked_bands)
        if new_band not in child2_genes:
            child2_genes.append(new_band)

    return creator.Individual(child1_genes), creator.Individual(child2_genes)

# Fitness function using SVM classification
def evaluate(individual):
    selected_indices = list(individual)
    X = image.reshape(-1, num_bands)[:, selected_indices]
    y = labels.ravel()
    X = X[0:5000, :]
    y = y[0:5000]
    unique_classes = np.unique(y)
    if len(unique_classes) < 2:
        return (0,)  

    try:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
    except ValueError:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    if len(np.unique(y_train)) < 2:
        return (0,)
    
    clf = SVC()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    kappa = cohen_kappa_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")

    return (acc + kappa + f1,)

toolbox.register("mate", crossover)
toolbox.register("mutate", mutate_individual)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# Run Genetic Algorithm
def run_ga():
    pop = toolbox.population(n=POP_SIZE)
    hof = tools.HallOfFame(1)  
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("max", np.max)
    
    algorithms.eaSimple(pop, toolbox, cxpb=CROSSOVER_RATE, mutpb=MUTATION_RATE, 
                        ngen=N_GEN, stats=stats, halloffame=hof, verbose=True)

    return hof[0]  

best_individual = run_ga()
print(f"Optimized Bands (GA-Wrapped SR-NMI-VI): {best_individual}")


gen	nevals	max    
0  	50    	2.98327
1  	38    	2.98327
2  	34    	2.9857 
3  	33    	2.98569
4  	31    	2.98569
5  	36    	2.98806
6  	39    	2.98566
7  	30    	2.98566
8  	34    	2.98566
9  	35    	2.98566
10 	40    	2.98567
11 	33    	2.98567
12 	43    	2.98327
13 	40    	2.98092
14 	39    	2.98807
15 	37    	2.98807
16 	35    	2.98329
17 	41    	2.98569
18 	41    	2.98808
19 	39    	2.99045
20 	37    	2.99045
21 	40    	2.99045
22 	40    	2.99284
23 	32    	2.99284
24 	37    	2.99284
25 	38    	2.99284
26 	38    	2.99284
27 	39    	2.99284
28 	41    	2.99047
29 	41    	2.99047
30 	30    	2.99047
31 	45    	2.99047
32 	41    	2.99047
33 	35    	2.98806
34 	45    	2.98329
35 	44    	2.99045
36 	34    	2.98566
37 	32    	2.98327
38 	36    	2.98566
39 	40    	2.98806
40 	33    	2.99284
41 	40    	2.98807
42 	36    	2.98807
43 	41    	2.98567
44 	35    	2.98806
45 	41    	2.99045
46 	36    	2.99045
47 	38    	2.99045
48 	43    	2.99045
49 	33    	2.99045
50 	38    	2.99045
Optimized Ba

In [7]:

X = image.reshape(-1, num_bands)[:, best_individual]
y = labels.ravel()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
classifiers = {
    "Decision Tree": DecisionTreeClassifier(),
    "Naïve Bayes": GaussianNB(),
    "SVM": SVC(),
    "Random Forest": RandomForestClassifier(),
}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    kappa = cohen_kappa_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    print(f"{name} - Accuracy: {acc:.4f}, Kappa: {kappa:.4f}, F1-score: {f1:.4f}")


Decision Tree - Accuracy: 0.9950, Kappa: 0.9923, F1-score: 0.9950
Naïve Bayes - Accuracy: 0.7507, Kappa: 0.6391, F1-score: 0.7614
SVM - Accuracy: 0.9951, Kappa: 0.9925, F1-score: 0.9951
Random Forest - Accuracy: 0.9980, Kappa: 0.9969, F1-score: 0.9980


In [None]:
Decision Tree - Accuracy: 0.9920, Kappa: 0.9877, F1-score: 0.9920
Naïve Bayes - Accuracy: 0.7230, Kappa: 0.6009, F1-score: 0.7316
SVM - Accuracy: 0.9888, Kappa: 0.9829, F1-score: 0.9888
Random Forest - Accuracy: 0.9971, Kappa: 0.9956, F1-score: 0.9971