In [None]:
import os
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
from skimage.transform import resize
import random
import shutil

In [None]:
def visualize_random_hsi(directory, num_images=4, figsize=(15, 10)):
    # Find all .npy files in directory
    hsi_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.npy'):
                hsi_files.append(os.path.join(root, file))
    
    if not hsi_files:
        print("No .npy files found in directory!")
        return

    # Select random files
    selected_files = random.sample(hsi_files, min(num_images, len(hsi_files)))
    
    # Create subplots
    rows = int(np.sqrt(num_images))
    cols = int(np.ceil(num_images / rows))
    fig, axes = plt.subplots(rows, cols, figsize=figsize)
    axes = axes.ravel() if num_images > 1 else [axes]

    for idx, file_path in enumerate(selected_files):
        try:
            # Load HSI cube
            hsi = np.load(file_path)
            
            if hsi.ndim != 3:
                print(f"Skipping {file_path}: Not a 3D array")
                continue

            # Create pseudo-color image (mean across bands)
            img = np.mean(hsi, axis=2)  # Change axis if different dimension order
            
            # Plot with Viridis colormap
            axes[idx].imshow(img, cmap='viridis')
            axes[idx].set_title(os.path.basename(file_path))
            axes[idx].axis('off')

        except Exception as e:
            print(f"Error visualizing {file_path}: {str(e)}")
    
    # Hide empty axes
    for j in range(idx+1, len(axes)):
        axes[j].axis('off')
    
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    
    output_directory = '<path_to_file>/Individual_Cubes_Label'
    
    visualize_random_hsi(
        
        directory=output_directory,
        num_images=4,        # Number of images to display
        figsize=(15, 10)     # Figure size
    )

In [None]:
def split_dataset(source_dir, output_dir, train_ratio=0.70, val_ratio=0, test_ratio=0.30):
    
    if not os.path.exists(source_dir):
        
        print(f"Source directory '{source_dir}' does not exist.")
        
        return

    # Ensure ratios sum up to 1
    assert train_ratio + val_ratio + test_ratio == 1, "Ratios must sum up to 1"

    # Define output subdirectories
    train_dir = os.path.join(output_dir, "train")
    val_dir = os.path.join(output_dir, "val")
    test_dir = os.path.join(output_dir, "test")

    # Create output directories if they don't exist
    for folder in [train_dir, val_dir, test_dir]:
        os.makedirs(folder, exist_ok=True)

    # Walk through dataset and split files
    for root, dirs, files in os.walk(source_dir):
        if not files:
            continue  # Skip empty directories
        
        # Relative path from source directory
        rel_path = os.path.relpath(root, source_dir)

        # Create corresponding folders in train, val, test
        for subset_dir in [train_dir, val_dir, test_dir]:
            os.makedirs(os.path.join(subset_dir, rel_path), exist_ok=True)

        # Shuffle and split files
        random.shuffle(files)
        total_files = len(files)
        train_split = int(total_files * train_ratio)
        val_split = int(total_files * val_ratio)

        train_files = files[:train_split]
        val_files = files[train_split:train_split + val_split]
        test_files = files[train_split + val_split:]

        # Copy files to respective directories
        for file in train_files:
            shutil.copy2(os.path.join(root, file), os.path.join(train_dir, rel_path, file))
        for file in val_files:
            shutil.copy2(os.path.join(root, file), os.path.join(val_dir, rel_path, file))
        for file in test_files:
            shutil.copy2(os.path.join(root, file), os.path.join(test_dir, rel_path, file))

    print(f"Dataset split complete. Check '{output_dir}' for the split dataset.")


source_dataset = '<path_to_file>/Individual_Cubes'
split_output = '<path_to_file>/Individual_Cubes_Label'
split_dataset(source_dataset, split_output)