In [None]:
import numpy as np
import os
from spectral.io import envi
import pandas as pd
import rasterio
import matplotlib.pyplot as plt

In [None]:
def calculate_mean_spectrum(image_data):
    """Calculate the mean spectrum across all spatial pixels."""
    return np.mean(image_data, axis=(0, 1))  # 1D array


def reshape_spectrum(mean_spectrum):
    """Reshape the mean spectrum to (1, 1, n_bands)."""
    return mean_spectrum.reshape((1, 1, len(mean_spectrum)))  # (1, 1, n_bands)


def save_spectrum(mean_spectrum, output_spec_path):
    """Save the mean spectrum as a .npy file."""
    # Reshape mean spectrum to (1, 1, n_bands)
    mean_spectrum_reshaped = reshape_spectrum(mean_spectrum)
    
    # Save the reshaped mean spectrum as a .npy file
    np.save(output_spec_path, mean_spectrum_reshaped)


def create_output_structure(input_folder, output_base_folder, current_file):
    """Create the corresponding output folder structure."""
    relative_path = os.path.relpath(current_file, input_folder)
    relative_folder = os.path.dirname(relative_path)
    
    output_folder = os.path.join(output_base_folder, relative_folder)
    os.makedirs(output_folder, exist_ok=True)
    
    return output_folder


def process_hsi_images(input_folder, output_base_folder):
    """Recursively traverse the folder structure, load HSI images, and save the mean spectrum."""
    for root, dirs, files in os.walk(input_folder):
        for file in files:
            if file.endswith('.npy'):
                # Get the .npy file path
                npy_path = os.path.join(root, file)
                
                # Load the hyperspectral image
                image_data = np.load(npy_path)  # This will be a numpy array
                
                # Calculate the mean spectrum
                mean_spectrum = calculate_mean_spectrum(image_data)
                
                # Create corresponding output folder structure
                output_folder = create_output_structure(input_folder, output_base_folder, npy_path)
                
                # Define the output .npy file path
                output_spec_path = os.path.join(output_folder, file.replace('.npy', '_mean_spectrum.npy'))
                
                # Save the mean spectrum
                save_spectrum(mean_spectrum, output_spec_path)
                
                print(f"Processed: {npy_path} -> {output_spec_path}")


if __name__ == "__main__":
    
    input_folder = '<input_directory_path>'
    output_base_folder = '<output_directory_path>'
    process_hsi_images(input_folder, output_base_folder)

In [None]:
def reshape_files_in_directory(output_directory):
    """
    Reshape all .npy files in the output directory from (1, 1, 462) to (1, 462).
    """
    for root, dirs, files in os.walk(output_directory):
        for file in files:
            if file.endswith('.npy'):
                file_path = os.path.join(root, file)
                
                # Load the .npy file
                data = np.load(file_path)
                
                # Check if the shape is (1, 1, 462)
                if data.shape == (1, 1, 462):
                    # Reshape to (1, 462)
                    reshaped_data = data.reshape(1, 462)
                    
                    # Save the reshaped data back to the same file
                    np.save(file_path, reshaped_data)
                    print(f"Reshaped: {file_path}")
                else:
                    print(f"Skipped (unexpected shape): {file_path} (shape: {data.shape})")


if __name__ == "__main__":
    
    output_directory = '<output_directory_path>'
    reshape_files_in_directory(output_directory)


In [None]:
#Labels
output_directory = '<output_directory_path>'

# Initialize a dictionary for labels
labels = {}

for root, dirs, files in os.walk(output_directory):
    
    for file in files:
        
        if file.endswith('.npy'):
            
            # Full file path
            npy_file_path = os.path.join(root, file)
            
            # Load the NumPy array
            data = np.load(npy_file_path)
            
            # Extract the label from the folder name
            folder_name = os.path.basename(root)
            
            labels[npy_file_path] = folder_name  # Store the label


# Print all labels
print("Labels assigned based on folder names:")

for npy_file, label in labels.items():
    
    print(f"{npy_file}: {label}")

In [None]:
print(labels)

In [None]:
def load_and_concatenate_npy_files(root_dir, label_from='filename'):
    
    data = []
    labels = []

    for root, dirs, files in os.walk(root_dir):
        
        for file in files:
            
            if file.endswith(".npy"):
                
                # Load the .npy file
                npy_path = os.path.join(root, file)
                npy_data = np.load(npy_path)
                
                # Handle cases where the data may not be 1D and needs flattening
                if len(npy_data.shape) > 1:
                    
                    npy_data = npy_data.flatten()
                
                # Append the data and the corresponding label
                data.append(npy_data)
                
                # Determine the label based on filename or folder
                if label_from == 'filename':
                    
                    label = os.path.splitext(file)[0]  # Use filename without extension as label
                    
                elif label_from == 'folder':
                    
                    label = os.path.basename(root)  # Use folder name as label
                else:
                    
                    raise ValueError("label_from must be either 'filename' or 'folder'")
                
                labels.append(label)

    # Convert the data and labels into a pandas DataFrame
    df = pd.DataFrame(data)
    df['label'] = labels

    return df

# Specify the root directory containing .npy files
root_directory = '<output_directory_path>'

# Set label_from to either 'filename' or 'folder'
label_source = 'folder'  # or 'folder'

# Get the concatenated data and labels
df = load_and_concatenate_npy_files(root_directory, label_from=label_source)

# Save the final DataFrame to a CSV or a new .npy file
df.to_csv("Blueberry_SM.csv", index=False)  # Option to save as CSV
np.save("Blueberry_SM.npy", df.to_numpy())  # Option to save as .npy file

In [None]:
data = np.load('Blueberry_SM.npy',allow_pickle = True)

print(data)
print(data.shape)