In [None]:
# Resampling for the relevant 20m spatial resolution bands (B5, B6, B7, B8a, B11, and B12).
# 10m bands (B2, B3, B4, and B8) will be used as is, 20m bands will be resampled to 10m resolution using the average method.
# The 60m resolution bands are not included in the extracted spectra.

In [None]:
import rasterio
import pandas as pd
import xml.etree.ElementTree as ET

def extract_spectra(xml_file, geotiff_file):
    try:
        # Parse the Sentinel-2 XML file to extract band information
        tree = ET.parse(xml_file)
        root = tree.getroot()

        bands_info = []
        for elem in root.iterfind(".//BANDS/BAND"):
            band_name = elem.find("BAND_NAME").text
            band_center_wavelength = float(elem.find("CENTRAL_WAVELENGTH").text)
            bands_info.append((band_name, band_center_wavelength))

        # Open the geotiff file using rasterio
        with rasterio.open(geotiff_file) as src:
            band_count = src.count
            band_data = []

            # Iterate over each band and read data
            for band_index in range(1, band_count + 1):
                band_data.append(src.read(band_index))

        # Resample the 20m resolution bands to 10m resolution using the average method
        resampled_band_data = []
        resample_indices = [1, 2, 3, 4, 8, 11, 12]  # Indices of 20m bands to be resampled
        for i in range(len(band_data)):
            if i in resample_indices:
                resampled_band = (band_data[i] + band_data[i + 1]) / 2.0
                resampled_band_data.append(resampled_band)
            elif i == 0 or i == 5:  # Bands 2 and 8 are already 10m resolution
                resampled_band_data.append(band_data[i])

        # Extract reflectance data from each band and create a dataframe
        spectra_data = {band_name: band_data for (band_name, _) in bands_info}
        df = pd.DataFrame(spectra_data)

        # Add the center wavelength information to the dataframe
        df['Wavelength (micrometers)'] = [wavelength for (_, wavelength) in bands_info]

        # Rename columns for samples
        df.columns = [f'Sample_{i}' for i in range(df.shape[1] - 1)] + ['Wavelength (micrometers)']

        # Display the dataframe (Optional: comment out if you have many samples)
        print(df)

        # Save the dataframe to a CSV file
        df.to_csv('spectra_table.csv', index=False)

    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    sentinel2_xml_file = "path/to/sentinel2.xml"
    sentinel2_geotiff_file = "path/to/sentinel2_geotiff.tif"
    extract_spectra(sentinel2_xml_file, sentinel2_geotiff_file)
