In [None]:
import sys

sys.path.append("/home/sayem/Desktop/deepHSI")  # Adjust to your project root path

from pathlib import Path
import numpy as np

# Custom module imports
from src.dataset.components.hyperspectral_dataset import HyperspectralDataset
from src.dataset.components.utils import *
from src.dataset.medical_datasets.bloodHSI import BloodDetectionHSIDataModule
from src.dataset.remote_sensing_datasets.paviaC import PaviaCDataModule
from src.dataset.remote_sensing_datasets.ksc import KSCDataModule
from src.models.hsi_classification_module import HSIClassificationLitModule
from src.models.components.simple_dense_net import HSIFCModel

# PyTorch and metrics imports
import torch
from torchmetrics import Precision, Recall, F1Score

# Importing from `lightning` instead of `pytorch_lightning`
import lightning as L
# from lightning import Trainer

torch.set_float32_matmul_precision('medium')

In [None]:
from scipy.io import loadmat

# Path to the .mat file
mat_file_path = '/home/sayem/Desktop/deepHSI/data/PaviaC/PaviaC/Pavia_gt.mat'

# Load the .mat file
data = loadmat(mat_file_path)


data

In [None]:
# Define the parameters for the data module
data_dir = '/home/sayem/Desktop/deepHSI/data'  # Specify the directory where you want the data to be downloaded

# Include 'batch_size', 'num_workers', and 'num_classes' within the hyperparams dictionary
hyperparams = {
    "batch_size": 32,
    "num_workers": 20,
    "patch_size": 5, 
    "center_pixel": True, 
    "supervision": "full",
    "num_classes": 10  # Define the number of classes in your dataset
}

# Assuming YourModel is defined elsewhere and num_classes is known
input_channels = 102

# Define custom metrics for the classification task using the updated hyperparams
custom_metrics = {
    "precision": Precision(num_classes=hyperparams["num_classes"], average='macro', task='multiclass'),
    "recall": Recall(num_classes=hyperparams["num_classes"], average='macro', task='multiclass'),
    "f1": F1Score(num_classes=hyperparams["num_classes"], average='macro', task='multiclass')
}

model = HSIFCModel(
    input_channels=input_channels,
    patch_size=hyperparams["patch_size"],  # Use patch_size from hyperparams
    n_classes=hyperparams["num_classes"],  # Use num_classes from hyperparams
    dropout=True
)

# Initialize the HSIClassificationLitModule with the model and other hyperparameters
hsi_module = HSIClassificationLitModule(
    net=model,
    optimizer_cls=torch.optim.Adam,
    optimizer_params={"lr": 1e-3},
    num_classes=hyperparams["num_classes"],  # Use num_classes from hyperparams
    custom_metrics=custom_metrics
)

# # Initialize the PyTorch Lightning Trainer
# trainer = Trainer(max_epochs=10, precision='16-mixed', accelerator='gpu', devices=1)
max_epochs = 15

# Initialize the PyTorch Lightning Trainer with fast_dev_run enabled
trainer = L.Trainer(
    fast_dev_run=False,  # Enable fast_dev_run
    precision='16-mixed',  # Use 16-bit precision
    accelerator='gpu',  # Specify the accelerator as GPU
    max_epochs = max_epochs
)

# Initialize the PaviaCDataModule with the updated arguments
pavia_c_datamodule = PaviaCDataModule(
    data_dir=data_dir,
    hyperparams=hyperparams  # Pass hyperparams which now includes num_classes
)

# # # Prepare and set up the data module
# pavia_c_datamodule.prepare_data()
# pavia_c_datamodule.setup(stage='fit')

In [None]:
# Fit the model using the train dataset from the data module
# trainer.fit(hsi_module, pavia_c_datamodule.train_dataloader())  
trainer.fit(hsi_module, datamodule=pavia_c_datamodule)
# Use train_dataloader() instead of train_dataset

In [None]:
STOP

In [None]:
# Define the parameters for the data module
data_dir = '/home/sayem/Desktop/deepHSI/data'  # Specify the directory where you want the data to be downloaded

# Now include 'batch_size' and 'num_workers' within the hyperparams dictionary
hyperparams = {
    "batch_size": 32,
    "num_workers": 3,
    "patch_size": 5, 
    "center_pixel": True, 
    "supervision": "full"
}

# Initialize the PaviaCDataModule with the updated arguments
pavia_c_datamodule = PaviaCDataModule(
    data_dir=data_dir,
    hyperparams=hyperparams
)

# Prepare and set up the data module
pavia_c_datamodule.prepare_data()
pavia_c_datamodule.setup(stage='fit')

# Assert to make sure the datasets are initialized properly
assert pavia_c_datamodule.train_dataset and pavia_c_datamodule.val_dataset, \
    "Datasets not initialized properly."

In [None]:
train_dataloader = pavia_c_datamodule.train_dataloader()
val_dataloader = pavia_c_datamodule.val_dataloader()
test_dataloader = pavia_c_datamodule.test_dataloader()

# Optionally, iterate over a few batches to ensure they're loading correctly
for batch in train_dataloader:
    x, y = batch
    print(x.shape, y.shape)
    break  # Just to check the first batch

In [None]:
STOP

In [None]:
import torch
from torchmetrics import Precision, Recall, F1Score
from pytorch_lightning import Trainer
from src.models.hsi_classification_module import HSIClassificationLitModule
from src.models.components.simple_dense_net import HSIFCModel

torch.set_float32_matmul_precision('medium')

# Assuming YourModel is defined elsewhere and num_classes is known
input_channels = 103

# Define custom metrics for the classification task
custom_metrics = {
    "precision": Precision(num_classes=hyperparams["num_classes"], average='macro', task='multiclass'),
    "recall": Recall(num_classes=hyperparams["num_classes"], average='macro', task='multiclass'),
    "f1": F1Score(num_classes=hyperparams["num_classes"], average='macro', task='multiclass')
}

# # Initialize your model
# model = HSIFCModel(input_channels=103, n_classes=num_classes)  # Update input_channels accordingly

model = HSIFCModel(input_channels=input_channels,
                    patch_size=hyperparams["patch_size"], \
                    n_classes=hyperparams["num_classes"], dropout=True)

# Initialize the HSIClassificationLitModule with the model and other hyperparameters
hsi_module = HSIClassificationLitModule(
    model=model,
    optimizer_cls=torch.optim.Adam,
    optimizer_params={"lr": 1e-3},
    num_classes=num_classes,
    custom_metrics=custom_metrics
)

# Initialize the PyTorch Lightning Trainer
trainer = Trainer(max_epochs=10, \
    precision='16-mixed', accelerator='gpu', devices=1)

# Fit the model
# Make sure to replace `pavia_c_datamodule` with your actual data module instance
trainer.fit(hsi_module, pavia_c_datamodule.train_dataset)

In [None]:
STOP

In [None]:
from spectral import *
import scipy.io

# Load the hyperspectral image
paviaU_path = '/home/sayem/Desktop/deepHSI/data/PaviaU/PaviaU/PaviaU.mat'
paviaU_data = scipy.io.loadmat(paviaU_path)['paviaU']

print(paviaU_data.shape)

# Load the ground truth
paviaU_gt_path = '/home/sayem/Desktop/deepHSI/data/PaviaU/PaviaU/PaviaU_gt.mat'
paviaU_gt = scipy.io.loadmat(paviaU_gt_path)['paviaU_gt']


# Display the hyperspectral image with the ground truth overlay
view = imshow(paviaU_data, (55, 41, 12), classes=paviaU_gt)
view.set_display_mode('overlay')
view.class_alpha = 0.5

In [None]:
# Define the parameters for the data module
data_dir = '/home/sayem/Desktop/deepHSI/data'  # Specify the directory where you want the data to be downloaded

batch_size = 1

hyperparams = {"patch_size": 5, "center_pixel": True, "supervision": "full"}

ksc_datamodule = KSCDataModule(data_dir=data_dir, batch_size=batch_size, **hyperparams)

ksc_datamodule.prepare_data()
ksc_datamodule.setup(stage='fit')

assert ksc_datamodule.train_dataset and ksc_datamodule.val_dataset, "Datasets not initialized properly."

In [None]:
from spectral import *
import scipy.io

# Load the hyperspectral image
ksc_data_path = '/home/sayem/Desktop/deepHSI/data/KSC/KSC/KSC.mat'  # Update this path
ksc_data = scipy.io.loadmat(ksc_data_path)['KSC']  # Update 'variable_name' with the actual variable name in your MAT file

print(ksc_data.shape)

# Load the ground truth
ksc_gt_path = '/home/sayem/Desktop/deepHSI/data/KSC/KSC/KSC_gt.mat'  # Update this path
ksc_gt = scipy.io.loadmat(ksc_gt_path)['KSC_gt']  # Update 'variable_name' with the actual variable name in your MAT file

# Display the hyperspectral image with the ground truth overlay
# Update the band numbers (55, 41, 12) according to your dataset's characteristics
view = imshow(ksc_data, (43, 21, 11), classes=ksc_gt)
view.set_display_mode('overlay')
view.class_alpha = 0.5

In [None]:
# Assuming ksc_gt is already loaded and is a numpy array
import numpy as np
unique_values = np.unique(ksc_gt)

unique_values

In [None]:
# Find unique values and their counts
unique_values, counts = np.unique(ksc_gt, return_counts=True)

# Combine unique values and their counts for display
unique_values_counts = list(zip(unique_values, counts))

unique_values_counts

In [None]:
STOP

In [None]:
# Define the parameters for the data module
data_dir = '/home/sayem/Desktop/deepHSI/data'  # Specify the directory where you want the data to be downloaded
doi = '10.5281/zenodo.3984905'  # Specify the DOI for the Zenodo dataset
batch_size = 32  # Specify the batch size
patch_size = 5  # Specify the patch size

# download_from_zenodo(doi, data_dir)


# # Instantiate the data module
blood_detection_data_module \
    = BloodDetectionHSIDataModule(data_dir, doi, batch_size, patch_size)

# Prepare the data (download if necessary)s
blood_detection_data_module.prepare_data()

# # # Setup the data module (this step usually prepares datasets for training/validation/testing)
blood_detection_data_module.setup(stage='fit')

# Now you can use the data module to get dataloaders
train_dataloader = blood_detection_data_module.train_dataloader()
val_dataloader = blood_detection_data_module.val_dataloader()

# Example: iterate over the training data
for batch in train_dataloader:
    x, y = batch  # x is your input data and y is the labels
    print(x.shape)
    print(y.shape)
    break
    # Perform your training operations here...    

In [None]:
from spectral import *
import spectral.io.envi as envi

name = 'D_1'

float_file = f'/home/sayem/Desktop/deepHSI/data/BloodDetectionHSI/HyperBlood/data/{name}.float'
hrd_file = f'/home/sayem/Desktop/deepHSI/data/BloodDetectionHSI/HyperBlood/data/{name}.hdr'

hs_image = envi.open(hrd_file, float_file)

image_data = hs_image.load()

image_data.shape

In [None]:
from spectral import imshow

band_number = 41  # For example, to view the 10th band
view = imshow(image_data[:, :, band_number-1])

In [None]:
rgb_bands = (51, 41, 9)  # Example band numbers for RGB
view = imshow(image_data, bands=rgb_bands)

In [None]:
anno_file = f'/home/sayem/Desktop/deepHSI/data/BloodDetectionHSI/HyperBlood/anno/{name}.npz'

with np.load(anno_file) as data:
    annotation = next(iter(data.values()))
    print(f"Loaded annotation data.")

In [None]:
annotation.shape

In [None]:
view = imshow(image_data, rgb_bands, classes=annotation)

view.set_display_mode('overlay')

view.class_alpha = 0.5

In [None]:
# Assuming ksc_gt is already loaded and is a numpy array
import numpy as np
unique_values = np.unique(annotation)

unique_values

In [None]:
# Find unique values and their counts
unique_values, counts = np.unique(annotation, return_counts=True)

# Combine unique values and their counts for display
unique_values_counts = list(zip(unique_values, counts))

unique_values_counts

In [None]:
STOP

In [None]:
dataset_name = "KSC"
data_dir = Path("/home/sayem/Desktop/deepHSI/data")
target_folder = data_dir / dataset_name
target_folder.mkdir(parents=True, exist_ok=True)

# Ensure the dataset is downloaded only once for the session
download_dataset(dataset_name, str(target_folder))

# Load the dataset components
img, gt, label_values, ignored_labels, rgb_bands, _ = load_dataset(
    dataset_name, str(target_folder)
)

hyperparams = {
    "dataset": dataset_name,
    "patch_size": 4,
    "ignored_labels": ignored_labels,
    "center_pixel": True,
    "supervision": "full",
}

dataset = HyperspectralDataset(img, gt, transform=None, **hyperparams)

In [None]:
img.shape

In [None]:
image, label = dataset[0]

In [None]:
image.shape

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming `img` is your 3D image array with shape (512, 217, 204)

# Correcting the assumption about the array dimensions:
# img.shape = (samples, lines, bands)

# Selecting the 100th band from the image
# Given the structure, the band data is still the third dimension
band_to_plot = img[:, :, 99]  # 99 for the 100th band as indexing starts from 0

# Plotting the selected band
plt.imshow(band_to_plot, cmap='gray')  # Using 'gray' colormap for the band image
plt.colorbar()  # Adds a colorbar to represent the intensity scale of the band
plt.title('Band 100')  # Setting the title to indicate the band being plotted
plt.show()  # Display the plot

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
import subprocess

# Assuming `img` is your 3D image array with shape (samples, lines, band
# Subfolder for PNG files
subfolder = 'bands'
os.makedirs(subfolder, exist_ok=True)  # Create the subfolder if it doesn't exist

for i in range(img.shape[2]):  # Loop through each band
    band_to_plot = img[:, :, i]
    
    # Plotting the selected band with a heatmap-like colormap
    plt.figure(figsize=(10, 4), dpi=200)
    plt.imshow(band_to_plot, cmap='coolwarm', aspect='auto')
    plt.colorbar()
    plt.title(f'Band {i+1}')
    plt.axis('off')
    
    # Define the PNG filename within the subfolder
    png_filename = os.path.join(subfolder, f'{i+1}.png')
    
    # Save the plot as a PNG file in the subfolder
    plt.savefig(png_filename, bbox_inches='tight', pad_inches=0, dpi=100)
    plt.close()

In [None]:
STOP

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming `gt` is your 2D ground truth array with shape (512, 217)

# Creating a figure and axis for better control
fig, ax = plt.subplots(figsize=(12, 8))  # Increase figure size for better clarity

# Creating a discrete colormap for your 16 classes
cmap = plt.get_cmap('tab20', 16)  # 'tab20' colormap with 16 distinct colors

# Plotting the ground truth array with the colormap
im = ax.imshow(gt, cmap=cmap, vmin=0, vmax=15)  # Ensure vmin and vmax match the range of your classes

# Creating a colorbar with a tick for each class
# Here, we specify the boundaries and ticks for clarity
cbar = fig.colorbar(im, ax=ax, boundaries=np.arange(-0.5, 16, 1), ticks=np.arange(0, 16), fraction=0.02, pad=0.04)
cbar.set_label('Class Labels')  # Labeling the color bar
cbar.set_ticklabels([f'Class {i}' for i in range(16)])  # Set custom tick labels

# Setting the title and axis labels for more informative presentation
ax.set_title('Ground Truth Visualization with 16 Classes')
ax.set_xlabel('Sample Index')
ax.set_ylabel('Line Index')

plt.show()

In [None]:
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# Assuming `img` is your 3D hyperspectral image array with shape (lines, samples, bands)

# Reshape the image to a 2D array (pixels, bands)
lines, samples, bands = img.shape
img_reshaped = img.reshape((lines * samples, bands))

# Initialize PCA, you can choose the number of components (n_components) based on your needs
# For demonstration, let's reduce the bands to 10 principal components
pca = PCA(n_components=10)

# Fit and transform the data to reduce dimensions
img_pca = pca.fit_transform(img_reshaped)

# Reshape the transformed data back to a 3D array (lines, samples, n_components)
img_pca_reshaped = img_pca.reshape((lines, samples, pca.n_components))

print(img_pca_reshaped.shape)

# Visualize the first principal component as an example
plt.imshow(img_pca_reshaped[:, :, 0], cmap='gray')
plt.colorbar()
plt.title('First Principal Component')
plt.show()

# You can now use img_pca_reshaped for further analysis or classification


In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Assuming `img` is your 3D hyperspectral image array with shape (lines, samples, bands)
# And assuming you have a corresponding label array `labels` with the shape (lines, samples)

# Reshape the image to a 2D array (pixels, bands)
lines, samples, bands = img.shape
img_reshaped = img.reshape((lines * samples, bands))

# Also, reshape the labels to a 1D array
labels_reshaped = gt.reshape((lines * samples,))

# Split the data and labels into training and testing sets
X_train, X_test, y_train, y_test \
    = train_test_split(img_reshaped, labels_reshaped, test_size=0.3, random_state=42)

# Create a pipeline that includes PCA and KNN classifier
pipeline = Pipeline([
    ('pca', PCA()),
    ('knn', KNeighborsClassifier())
])

# Define the parameter grid for hyperparameter tuning
param_grid = {
    'pca__n_components': [50, 100, 150],  # Number of principal components to try
    'knn__n_neighbors': [3, 5, 7]  # Number of neighbors for KNN
}

# Initialize GridSearchCV with the pipeline and parameter grid
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy')

# Fit GridSearchCV on the training data
grid_search.fit(X_train, y_train)

# Print the best parameters and the corresponding score
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best cross-validation score: {grid_search.best_score_:.2f}')

# Use the best estimator to make predictions on the test set
y_pred = grid_search.predict(X_test)

# Calculate the accuracy of the best estimator on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f'Test set accuracy: {accuracy * 100:.2f}%')

In [None]:
# Fit PCA on the entire dataset with the best number of components
best_pca = PCA(n_components=grid_search.best_params_['pca__n_components'])
img_pca_full = best_pca.fit_transform(img_reshaped)

# Extract the first principal component
first_pc_full = img_pca_full[:, 0]

# Reshape the first principal component to the original spatial dimensions
first_pc_reshaped = first_pc_full.reshape(lines, samples)

# Visualize the first principal component
plt.imshow(first_pc_reshaped, cmap='gray')
plt.colorbar()
plt.title('First Principal Component with Best PCA')
plt.show()