In [None]:
import rioxarray as rxr     # asterio with xarray support
import xarray as xr         # n-dimenzional data
import xvec                 # vector data
import pandas as pd         # data manipulation
import geopandas as gpd     # geospatial data manipulation

import numpy as np          # numerical operations
import seaborn as sns       # statistical data visualization
import matplotlib.pyplot as plt # plotting 


In [None]:
tiff_file_imad = 'data/imad20Itnew_rescale_SWIR.tif' 

rds_imad = rxr.open_rasterio(tiff_file_imad, masked=True).squeeze()
# divide bands into separate variables
landsat9 = rds_imad.sel(band = [1, 2, 3, 4, 5, 6])
landsat5 = rds_imad.sel(band = [7, 8, 9, 10, 11, 12])
iamd = rds_imad.sel(band = [13, 14, 15, 16, 17, 18])
nc_pix = rds_imad.sel(band = [19])
srtm_sl_a = rds_imad.sel(band = [20,21,22])

print(f'Dataset consists of {landsat9.shape[0]} bands of Landsat 9, {landsat5.shape[0]} bands of Landsat 5, {iamd.shape[0]} bands of IAMD, \n{nc_pix.shape[0]} band of NC_Pixels and {srtm_sl_a.shape[0]} bands of SRTM - height, slope, aspect.')


In [None]:
tiff_file_pif = 'data/PIF_EMD_10_rescale.tif' # PIF_SID.tif

rds_pif = rxr.open_rasterio(tiff_file_pif, masked=True).squeeze()

landsat9 = rds_pif.sel(band = [1, 2, 3, 4, 5, 6])
landsat5 = rds_pif.sel(band = [7, 8, 9, 10, 11, 12])
pif = rds_pif.sel(band = [13, 14, 15, 16, 17, 18])
nc_pix = rds_pif.sel(band = [19])
srtm_sl_a = rds_pif.sel(band = [20, 21, 22])

print(f'Dataset consists of {landsat9.shape[0]} bands of Landsat 9, {landsat5.shape[0]} bands of Landsat 5, {iamd.shape[0]} bands of IAMD, \n{nc_pix.shape[0]} band of NC_Pixels and {srtm_sl_a.shape[0]} bands of SRTM - height, slope, aspect.')

In [None]:
# load vector data
gdf = gpd.read_file('data/SHP/invar_p.shp')
# check nan values
print(f'count of NaN values for vecotr data: \n{gdf.isna().sum()}')

# reproject gdf to the same crs as raster
gdf_imad = gdf.to_crs(rds_imad.rio.crs)

# SID, SAM, EMD
gdf_pif = gdf.to_crs(rds_pif.rio.crs)

# check if the vector data is in the same crs as raster
print(gdf_imad.crs == rds_imad.rio.crs)
print(gdf_pif.crs == rds_pif.rio.crs)

In [None]:
# IMAD raster data select bands and create a cube
invar_pix = rds_imad.sel(band=19)
l9_blue = rds_imad.sel(band=1)
l9_green = rds_imad.sel(band=2)
l9_red = rds_imad.sel(band=3)
l9_nir = rds_imad.sel(band=4)
l9_swir1 = rds_imad.sel(band=5)
l9_swir2 = rds_imad.sel(band=6)

l5_blue = rds_imad.sel(band=7)
l5_green = rds_imad.sel(band=8)
l5_red = rds_imad.sel(band=9)
l5_nir = rds_imad.sel(band= 10)
l5_swir1 = rds_imad.sel(band=11)
l5_swir2 = rds_imad.sel(band=12)

iamd_blue = rds_imad.sel(band=13)
iamd_green = rds_imad.sel(band=14)
iamd_red = rds_imad.sel(band=15)
iamd_nir = rds_imad.sel(band=16)
iamd_swir1 = rds_imad.sel(band=17)
iamd_swir2 = rds_imad.sel(band=18)

# create a cube with all imad bands
imad_cube = xr.concat(
    [l9_blue, l9_green, l9_red, l9_nir, l9_swir1, l9_swir2, l5_blue, l5_green, l5_red, l5_nir, l5_swir1, l5_swir2, iamd_blue, iamd_green, iamd_red, iamd_nir, iamd_swir1, iamd_swir2 , invar_pix],
    dim=pd.Index(
        ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2', 'l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2', 'ib',  'ig',  'ir',  'in',  'is1',  'is2', "invar_pix"],
        name='measurement',
    ) #" l9b", "l9g", "l9r", "l9n", "l9s1", "l5b", "l5g", "l5r", "l5n", "l5s1", "ib", "ig", "ir", "in", "is1", "invar_pix"
) #

# PIF raster data select bands and create a cube
distance = rds_pif.sel(band=19)
l9_blue = rds_pif.sel(band=1)
l9_green = rds_pif.sel(band=2)
l9_red = rds_pif.sel(band=3)
l9_nir = rds_pif.sel(band=4)
l9_swir1 = rds_pif.sel(band=5)
l9_swir2 = rds_pif.sel(band=6)

l5_blue = rds_pif.sel(band=7)
l5_green = rds_pif.sel(band=8)
l5_red = rds_pif.sel(band=9)
l5_nir = rds_pif.sel(band= 10)
l5_swir1 = rds_pif.sel(band=11)
l5_swir2 = rds_pif.sel(band=12)

pif_blue = rds_pif.sel(band=13)
pif_green = rds_pif.sel(band=14)
pif_red = rds_pif.sel(band=15)
pif_nir = rds_pif.sel(band=16)
pif_swir1 = rds_pif.sel(band=17)
pif_swir2 = rds_pif.sel(band=18)

# create a cube with all PIF (sid,sam,emd) bands
pif_cube = xr.concat(
    [l9_blue, l9_green, l9_red, l9_nir, l9_swir1, l9_swir2, l5_blue, l5_green, l5_red, l5_nir, l5_swir1, l5_swir2, pif_blue, pif_green, pif_red, pif_nir, pif_swir1, pif_swir2, distance],
    dim=pd.Index(
        ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2', 'l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2', 'pb',  'pg',  'pr',  'pn',  'ps1', 'ps2', 'distance'],
        name='measurement',
    )
)
pif_cube

In [None]:
# extract raster values to points: once for imad cube and once for pif cube
vector_imad_cube = imad_cube.drop_vars('spatial_ref').xvec.extract_points(
    points=gdf_imad.geometry,
    x_coords='x',
    y_coords='y',
)
vector_imad_cube

vector_pif_cube = pif_cube.drop_vars('spatial_ref').xvec.extract_points(
    points=gdf_pif.geometry,
    x_coords='x',
    y_coords='y',
)

In [None]:
#IRMAD: convert extracted raster values to geopandas dataframe
gdf_raster_imad = vector_imad_cube.xvec.to_geopandas()
print(gdf_raster_imad.shape)
gdf_raster_imad.head()
# sjoin
gdf_vec_imad = gdf_raster_imad.sjoin(gdf_imad[['geometry', 'type']], how='left', predicate='intersects')
# dorp index_right column
gdf_vec_imad.drop(columns='index_right', inplace=True)

cols_to_scale = ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2', 'l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2', 'ib',  'ig',  'ir',  'in',  'is1',  'is2']      
gdf_vec_imad[cols_to_scale] *= 100 

# PIF:
gdf_raster_pif = vector_pif_cube.xvec.to_geopandas()
print(gdf_raster_pif.shape)
gdf_raster_pif.head()
# sjoin
gdf_vector = gdf_raster_pif.sjoin(gdf_pif[['geometry', 'type']], how='left', predicate='intersects') 
# dorp index_right column
gdf_vector.drop(columns='index_right', inplace=True)
gdf_vector.dropna()


In [None]:
# EXTRACT invar pix from imad_cube
imad_df = imad_cube.drop_vars('spatial_ref')

# convert xarray.Dataarray to pandas.DataFrame
imad_df = imad_df.to_dataframe(name='measurement').unstack(level='measurement').reset_index()
imad_df = imad_df.dropna()
# drop all 0 values
imad_df_inv = imad_df[imad_df['measurement']['invar_pix'] != 0]

# multiply all l5, l9 and i * 100 - for better visualization
inner_cols = ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2',
              'l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2',
              'ib',  'ig',  'ir',  'in',  'is1',  'is2']

imad_df = imad_df_inv.copy()

idx = pd.IndexSlice
imad_df.loc[:, idx['measurement', inner_cols]] *= 100
# print(imad_df_inv) - if you want to see the difference
imad_df

In [None]:
# EXTRACT invar pix from  pif_cube
pif_df = pif_cube.drop_vars('spatial_ref')

# convert xarray.Dataarray to pandas.DataFrame
pif_df = pif_df.to_dataframe(name='measurement').unstack(level='measurement').reset_index()
pif_df.dropna(inplace=True)
# drop all 0 values - just PIF pixels
pif_df_inv = pif_df[pif_df['measurement']['distance'] != 0]

# multiply all l5, l9 and i * 100
inner_cols = ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2',
              'l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2',
              'pb',  'pg',  'pr',  'pn',  'ps1', 'ps2']

pif_df = pif_df_inv.copy()

idx = pd.IndexSlice
pif_df.loc[:, idx['measurement', inner_cols]] *= 100
pif_df


In [None]:
# load csv file with coefficients
coefficients_df_imad = pd.read_csv('data/coeffs/IMAD_coeffs_20_rescale_SWIR_POSLEDNI.csv', index_col=0) # l5l9: IMAD_coeffs_1_rescale_SWIR_POSLEDNI.csv

coefficients_df_pif = pd.read_csv('data/coeffs/PIF_coeffs_EMD_10.csv', index_col=0) # PIF_coeffs_SID, PIF_coeffs_SAM, PIF_coeffs_EMD

cols_to_scale = ['Offset']      
coeff_scaled = coefficients_df_imad.copy()
coeff_scaled[cols_to_scale] *= 100 
coefficients_df_imad
coeff_scaled

In [None]:
cols_to_scale_pif = ['offset_before', 'offset_after']      
coeff_scaled_pif = coefficients_df_pif.copy()
coeff_scaled_pif[cols_to_scale_pif] *= 100 
coeff_scaled_pif

### SCATTER PLOTS

In [None]:
from sklearn.metrics import r2_score


def plot_irmad(
    df, band_l5, band_l9, band_irmad,
    a2, b2,
    band_label='Blue',
    x_min=0, x_max=1, ax_step=0.01
):
    '''
    Plots a scatterplot with regression line and R² for PIF only with adjustable x-range.
    parameters:
    -----------
    df: DataFrame with MultiIndex columns (e.g. imad_df, or pif_df_inv)
    band_l5: Landsat 5 column name (e.g. "l5b")
    band_l9: Landsat 9 column name (e.g. "l9b")
    band_irmad or band_pif (for the second function): normalized column name (e.g. "ib" or "pb")
    a2, b2: PIF vs L9 regression line coefficients
    band_label: band name (e.g. "Blue")
    x_min, x_max: for setting x-axis range
    ax_step: step for x and y ticks
    
    explanation:
    -----------
    - Black dots: scatter L5 x L9 (before normalization)
    - Red dots: scatter IRMAD x L9
    - Orange line: regression line for invariant pixels
    - Gray line: ideal y = x

    the second function plot_pif works in a similar way
    '''

    # data
    x1 = df['measurement'][band_l5].values
    y1 = df['measurement'][band_l9].values
    x2 = df['measurement'][band_irmad].values
    y2 = y1
    
    # masking NaN values
    mask1 = np.isfinite(x1) & np.isfinite(y1)
    mask2 = np.isfinite(x2) & np.isfinite(y2)
    x1, y1 = x1[mask1], y1[mask1]
    x2, y2 = x2[mask2], y2[mask2]

    # fit linear regression
    a1, b1 = np.polyfit(x1, y1, 1)

    # count new coefficients line based on the coefficients counted from IMAD
    # coefficients will be used for check if IMAD data lies on or around the line 
    a_new = a2 * a1
    b_new = a2 * b1 + b2

    # counting R²
    y2_pred =x2
    r2 = r2_score(y2, y2_pred)

    xs = np.linspace(x_min, x_max, 200) # prepare x values for the regression line
    y_comp = a_new * xs + b_new

    plt.figure(figsize=(10,6))
    plt.scatter(x1, y1, s=0.5, c='black', alpha=0.7, label='L5 x L9')    # points L5 vs L9
    plt.scatter(x2, y2, s=0.5, c='red',   alpha=0.5, label='IRMAD x L9') # points IRMAD vs L9

    plt.plot(xs, y_comp, color='orange', label=f'regression line for invariant pixels (IRMAD)') 
    plt.plot(xs, xs, color='gray', linestyle='--', label='ideal: y=x') # idal diagonal

    plt.xlabel(f'Landsat 5 / IRMAD {band_label} band')
    plt.ylabel(f'Landsat 9 {band_label} band')
    plt.suptitle(f'Scatter plot of {band_label}-band surface reflectance [%] for invariant pixels (IRMAD - 15 it.)', fontsize=14)
    plt.title(f'y={a2:.3f}x{b2:.3f}, R²={r2:.3f}\n', fontsize=12) # print scale, offset and R²
    plt.xlim(x_min, x_max)
    plt.xticks(np.arange(x_min, x_max+1, ax_step))
    plt.ylim(x_min, x_max)
    plt.yticks(np.arange(x_min, x_max+1, ax_step))
    plt.legend(fontsize=10, loc='lower right')
    plt.grid(True)
    plt.figure(figsize=(10, 6))
    plt.tight_layout()
    plt.show()

def plot_pif(
    df, band_l5, band_l9, band_pif,
    a2, b2,
    band_label='Blue',
    x_min=7200, x_max=15500, ax_step=1000
):
    # Data
    x_l5 = df['measurement'][band_l5].values
    x_pif = df['measurement'][band_pif].values
    y_l9 = df['measurement'][band_l9].values

    # control NaN values
    mask2 = np.isfinite(x_pif) & np.isfinite(y_l9)
    x2, y2 = x_pif[mask2], y_l9[mask2]

    y2_pred = x2
    r2_2 = r2_score(y2, y2_pred)

    plt.figure(figsize=(10, 6))
    plt.scatter(x_l5, y_l9, s=0.5, c='black', label='L5 x L9')       # points L5 vs L9
    plt.scatter(x2, y2, s=0.5, c='red', alpha=0.5, label='SAM x L9') # points normalized vs L9

    x_vals = np.linspace(x_min, x_max, 100)
    y2_line = a2 * x_vals + b2
    plt.plot(x_vals, y2_line, color='orange', label='regression for invariant pixels (SAM)')  
    plt.plot(x_vals, x_vals, color='grey', linestyle='--', label='ideal: y=x')

    plt.xlabel(f'Landsat 5 / SAM {band_label} band')
    plt.ylabel(f'Landsat 9 {band_label} band')
    plt.suptitle(f'Scatter plot of {band_label}-band surface reflactance [%] for invariant pixels (SAM_10)', fontsize=14) # 
    plt.title(f'y={a2:.3f}x + {b2:.3f}, R²={r2_2:.3f}\n', fontsize=12) # print scale, offset and R²
    plt.xlim(x_min, x_max)
    plt.xticks(np.arange(x_min, x_max+1, ax_step))
    plt.ylim(x_min, x_max)
    plt.yticks(np.arange(x_min, x_max+1, ax_step))
    plt.legend(fontsize=10)
    plt.grid(True)
    plt.figure(figsize=(10, 6))
    plt.tight_layout()
    plt.show()

In [None]:
# PIF scatter plots
plot_pif(
    df=pif_df,
    band_l5='l5b',
    band_l9='l9b',
    band_pif='pb',
    a2=coeff_scaled_pif.loc['SR_B2', 'scale_after'], # scale after normalization
    b2=coeff_scaled_pif.loc['SR_B2', 'offset_after'], # offset after normalization
    band_label='BLUE',
    x_min=0, x_max=30, ax_step=10
)
plot_pif(
    df=pif_df,
    band_l5='l5g',
    band_l9='l9g',
    band_pif='pg',
    a2=coeff_scaled_pif.loc['SR_B3', 'scale_after'], # scale after normalization
    b2=coeff_scaled_pif.loc['SR_B3', 'offset_after'], # offset after normalization
    band_label='GREEN',
    x_min=0, x_max=30, ax_step=10
)
plot_pif(
    df=pif_df,
    band_l5='l5r',
    band_l9='l9r',
    band_pif='pr',
    a2=coeff_scaled_pif.loc['SR_B4', 'scale_after'], # scale after normalization
    b2=coeff_scaled_pif.loc['SR_B4', 'offset_after'], # offset after normalization
    band_label='RED',
    x_min=0, x_max=30, ax_step=10
)
plot_pif(
    df=pif_df,
    band_l5='l5n',
    band_l9='l9n',
    band_pif='pn',
    a2=coeff_scaled_pif.loc['SR_B5', 'scale_after'], # scale after normalization
    b2=coeff_scaled_pif.loc['SR_B5', 'offset_after'], # offset after normalization
    band_label='NIR',
    x_min=0, x_max=60, ax_step=10
)
plot_pif(
    df=pif_df,
    band_l5='l5s1',
    band_l9='l9s1',
    band_pif='ps1',
    a2=coeff_scaled_pif.loc['SR_B6', 'scale_after'], # scale after normalization
    b2=coeff_scaled_pif.loc['SR_B6', 'offset_after'], # offset after normalization
    band_label='SWIR1',
    x_min=0, x_max=50, ax_step=10
)
plot_pif(
    df=pif_df,
    band_l5='l5s2',
    band_l9='l9s2',
    band_pif='ps2',
    a2=coeff_scaled_pif.loc['SR_B7', 'scale_after'], # scale after normalization
    b2=coeff_scaled_pif.loc['SR_B7', 'offset_after'], # offset after normalization
    band_label='SWIR2',
    x_min=0, x_max=50, ax_step=10
)

In [None]:
plot_irmad(
    df = imad_df, 
    band_l5 = "l5b", 
    band_l9 = "l9b", 
    band_irmad = "ib",
    a2 = coeff_scaled.loc["SR_B2"]["Slope"], 
    b2 = coeff_scaled.loc["SR_B2"]["Offset"],
    band_label="BLUE",
    x_min=0, x_max=15, ax_step=5
)
plot_irmad(
    df = imad_df, 
    band_l5 = "l5g", 
    band_l9 = "l9g", 
    band_irmad = "ig",
    a2 = coeff_scaled.loc["SR_B3"]["Slope"], 
    b2 = coeff_scaled.loc["SR_B3"]["Offset"],
    band_label="GREEN",
    x_min=0, x_max=20, ax_step=5
)
plot_irmad(
    df = imad_df, 
    band_l5 = "l5r", 
    band_l9 = "l9r", 
    band_irmad = "ir",
    a2 = coeff_scaled.loc["SR_B4"]["Slope"], 
    b2 = coeff_scaled.loc["SR_B4"]["Offset"],
    band_label="RED",
    x_min=0, x_max=25, ax_step=5
)
plot_irmad(
    df = imad_df, 
    band_l5 = "l5n", 
    band_l9 = "l9n", 
    band_irmad = "in",
    a2 = coeff_scaled.loc["SR_B5"]["Slope"], 
    b2 = coeff_scaled.loc["SR_B5"]["Offset"],
    band_label="NIR",
    x_min=0, x_max=70, ax_step=10
)
plot_irmad(
    df = imad_df, 
    band_l5 = "l5s1", 
    band_l9 = "l9s1", 
    band_irmad = "is1",
    a2 = coeff_scaled.loc["SR_B6"]["Slope"], 
    b2 = coeff_scaled.loc["SR_B6"]["Offset"],
    band_label="SWIR1",
    x_min=0, x_max=50, ax_step=10
)
plot_irmad(
    df = imad_df, 
    band_l5 = "l5s2", 
    band_l9 = "l9s2", 
    band_irmad = "is2",
    a2 = coeff_scaled.loc["SR_B7"]["Slope"], 
    b2 = coeff_scaled.loc["SR_B7"]["Offset"],
    band_label="SWIR2",
    x_min=0, x_max=50, ax_step=10
)

### STATISTICAL METRICS

In [None]:
# prepare IMAD no change pixels to compute statistical metrics 
# drop unnecessary columns
imad_df_inv = imad_df_inv.drop(['band'], axis=1)

# Flat columns - remove multidimensional index in columns
imad_df_inv.columns = [col[1] if isinstance(col, tuple) else col for col in imad_df_inv.columns]

imad_df_inv = imad_df_inv[(imad_df_inv != 0).all(1)] # drop all 0 values, .all(1) - all columns, all rows
imad_df_inv.dropna(inplace = True) # drop NaN values

# prepare PIF no change pixels for computing metrics 
# drop unnecessary columns
pif_df_inv = pif_df_inv.drop(['band'], axis=1)

# Flat columns - remove multidimensional index in columns
pif_df_inv.columns = [col[1] if isinstance(col, tuple) else col for col in pif_df_inv.columns]

pif_df_inv = pif_df_inv[(pif_df_inv != 0).all(1)] # drop all 0 values, .all(1) - all columns, all rows
pif_df_inv.dropna(inplace = True) # drop NaN values

#pif_df_inv
imad_df_inv

In [None]:
# drop unnecessary columns
imad_df = imad_df.drop(['band'], axis=1)

# Flat columns - remove multidimensional index in columns
imad_df.columns = [col[1] if isinstance(col, tuple) else col for col in imad_df.columns]

imad_df = imad_df[(imad_df != 0).all(1)] # drop all 0 values, .all(1) - all columns, all rows
imad_df.dropna(inplace = True) # drop NaN values

imad_df

In [None]:
# prepare PIF no change pixels for computing metrics 
# drop unnecessary columns
pif_df = pif_df.drop(['band'], axis=1)

# Flat columns - remove multidimensional index in columns
pif_df.columns = [col[1] if isinstance(col, tuple) else col for col in pif_df.columns]

pif_df = pif_df[(pif_df != 0).all(1)] # drop all 0 values, .all(1) - all columns, all rows
pif_df.dropna(inplace = True) # drop NaN values

pif_df

In [None]:
def compute_metrics(y_true, y_pred, method):
    res = y_pred - y_true # residuals = difference between predicted (L5, IRMAD, PIF) and true (L9) values
    return {
        f'rmse_{method}':   np.sqrt(np.mean(res**2)),
        f'mae_{method}':    np.mean(np.abs(res)), # mean absolute value of residuals
        f'stddev_{method}': np.std(res, ddof=1),
        f'mbe_{method}':    np.mean(res) # mean value of residuals
    }

def summarize_all_metrics(df, method='BI'):
    '''
    Calculate the metric for each band:
    Output: DataFrame with MultiIndex (band, method).
    '''
    
    allowed = {'before','PIF','IRMAD','BI',"BP"}
    if method not in allowed:
        raise ValueError(f'Unknown method: {method}')
    
    bands = ['b','g','r','n','s1', 's2'] # bands for Landsat 9 and Landsat 5
    rows = []

    for band in bands:
        y_true = df[f'l9{band}'].to_numpy()

        if method in ('before','BI','BP'):
            # L5 vs L9
            y_pred = df[f'l5{band}'].to_numpy()
            m = compute_metrics(y_true, y_pred, 'before')
            m.update(band=band) #, method='before'
            rows.append(m)

        if method in ('PIF','BP'):
            # PIF vs L9
            y_pred = df[f'p{band}'].to_numpy()
            m = compute_metrics(y_true, y_pred, 'PIF')
            m.update(band=band)#, method='PIF'
            rows.append(m)

        if method in ('IRMAD','BI'):
            # IRMAD vs L9
            y_pred = df[f'i{band}'].to_numpy()
            m = compute_metrics(y_true, y_pred, 'IRMAD')
            m.update(band=band) #, method='IRMAD'
            rows.append(m)

    metrics_df = pd.DataFrame(rows).set_index(['band'])
    return metrics_df

In [None]:
# compute metrics for IMAD
#imad_metrics = summarize_all_metrics(imad_df_inv, method='IRMAD')
imad_metrics = summarize_all_metrics(imad_df, method='IRMAD')

#pif_metrics = summarize_all_metrics(pif_df, method='PIF')
#pif_metrics

#before_metrics_imad = summarize_all_metrics(imad_df_inv, method='before')
before_metrics_imad = summarize_all_metrics(imad_df, method='before')
before_metrics_imad

In [None]:
before_metrics_pif = summarize_all_metrics(pif_df, method='before')
print(before_metrics_pif)
# rename columns
before_metrics_pif.rename(columns = {'rmse_before': 'rmse_before_pif', 'mae_before': 'mae_before_pif', 'stddev_before': 'stddev_before_pif', 'mbe_before': 'mbe_before_pif'}, inplace = True)
print(before_metrics_pif)

In [None]:
# why do the metrics for before come out differently for the PIF dataset and differently for the IMAD dataset?
# because the PIF dataset is different than the IMAD dataset, different data, different locations, different time, different atmosphere, different geometry, etc.

In [None]:
before_metrics_imad
#before_metrics_pif

In [None]:
imad_metrics
#pif_metrics

In [None]:
# cat all metrics together
all_metrics = pd.concat([before_metrics_imad, imad_metrics], axis=1) #, before_metrics_pif, pif_metrics
#all_metrics = pd.concat([before_metrics_pif, pif_metrics], axis=1) #, before_metrics_pif, pif_metrics
all_metrics
# save to excel file
all_metrics.to_excel('norm_metrics_15_posledni.xlsx', index=True, sheet_name='metrics')

### SPECTRAL REFLECTANCE CURVES

In [None]:
# sort landcover types from 1 to 5
gdf_vec_imad = gdf_vec_imad.sort_values(by='type')
# spectral bands for Landsat 9
l9_spect_bands = ['ib', 'ig', 'ir', 'in', 'is1', 'is2'] # ['l5b', 'l5g', 'l5r', 'l5n', 'l5s1'], nebo ['ib', 'ig', 'ir', 'in', 'is1', 'is2']
land_names = ['1) Builtup', '2) Water', '3) Forest', '4) Cropland', '5) Grassland']

# get unique landcover types
landcover_types = gdf_vec_imad['type'].unique()

# count rows and columns for subplots
n_cols = 3 
n_rows = 2

fig, axes = plt.subplots(2, 3, figsize=(5 * n_cols, 4 * n_rows), squeeze=False)

for idx, landcover_type in enumerate(landcover_types):
    row = idx // n_cols
    col = idx % n_cols
    ax = axes[row, col]
    
    subset = gdf_vec_imad[gdf_vec_imad['type'] == landcover_type]
    
    # plot each spectral curve separately (one line = one point)
    for _, row_data in subset.iterrows():
        ax.plot(l9_spect_bands, row_data[l9_spect_bands], alpha=0.2, color='gray')
    
    # count the mean and std of the each band
    mean_spect = subset[l9_spect_bands].mean()
    std_spectrum = subset[l9_spect_bands].std()
    ax.plot(l9_spect_bands, mean_spect, color='red', linewidth=2, label='mean')
    ax.plot(l9_spect_bands, mean_spect - 2*std_spectrum, '--', color='darkred', linewidth=2.5, label='± 2 std')
    ax.plot(l9_spect_bands, mean_spect + 2*std_spectrum, '--', color='darkred', linewidth=2.5)
    
    ax.set_title(f'{land_names[idx]}')
    ax.set_xlabel('Band')
    ax.set_ylabel('SR')
    ax.grid(True)
    ax.legend()

fig.delaxes(axes[1,2])
plt.suptitle('Landsat 9 Spectral Curves', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

In [None]:
# IRMAD ONLY
# delete max value for type 1 from l5b
type1 = gdf_vec_imad[gdf_vec_imad['type'] == 1]
del1 = type1[type1['l5b'] ==type1['l5b'].min()]
# delete max value for type 1 from l5b
# type1 = type1.drop(type1[type1['l5b'] == type1['l5b'].max()].index)
gdf_vec_imad = gdf_vec_imad.drop(type1[type1['l5b'] ==type1['l5b'].min()].index)

In [None]:
# PIF ONLY
# delete max value for type 1 from l5b
type1 = gdf_vector[gdf_vector['type'] == 1]
del1 = type1[type1['l5b'] ==type1['l5b'].min()]
# delete max value for type 1 from l5b
# type1 = type1.drop(type1[type1['l5b'] == type1['l5b'].max()].index)
gdf_vec_imad = gdf_vector.drop(type1[type1['l5b'] ==type1['l5b'].min()].index)

In [None]:
# create graphs with relfectance curves
print(gdf_vec_imad['type'].unique())

def get_mean_values(subset):
    l5 = ['l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2'] #'l5s2'
    l9 = ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2'] # 'l9s2'
    #pif: ['pb', 'pg', 'pr', 'pn', 'ps1', 'ps2'] - if you run PIF ONLY - copy this line and paste it into line below
    imad = ['pb', 'pg', 'pr', 'pn', 'ps1', 'ps2']
    #imad: ['ib', 'ig', 'ir', 'in', 'is1', 'is2'] - if you run IRMAD ONLY - copy this line and paste it into line above
    mean_landsat9 = subset[l9].mean().values
    mean_landsat5 = subset[l5].mean().values
    mean_irmad = subset[imad].mean().values
    return (mean_landsat9, mean_landsat5, mean_irmad)

def get_median_values(subset):
    l5 = ['l5b', 'l5g', 'l5r', 'l5n', 'l5s1', 'l5s2']
    l9 = ['l9b', 'l9g', 'l9r', 'l9n', 'l9s1', 'l9s2']
    #pif: ['pb', 'pg', 'pr', 'pn', 'ps1', 'ps2'] - if you run PIF ONLY - copy this line and paste it into line below
    imad = ['pb', 'pg', 'pr', 'pn', 'ps1', 'ps2']
    #imad: ['ib', 'ig', 'ir', 'in', 'is1', 'is2'] - if you run IRMAD ONLY - copy this line and paste it into line above
    median_landsat9 = subset[l9].median().values
    median_landsat5 = subset[l5].median().values
    median_irmad = subset[imad].median().values
    # merge into one dataframe
    median_df = pd.DataFrame([median_landsat9, median_landsat5, median_irmad], columns=l9)
    return (median_landsat9, median_landsat5, median_irmad)

# create subsets for each landcover type
gdf_vector1 = gdf_vec_imad[gdf_vec_imad['type'] == 1] 
gdf_vector2 = gdf_vec_imad[gdf_vec_imad['type'] == 2]
gdf_vector3 = gdf_vec_imad[gdf_vec_imad['type'] == 3]
gdf_vector4 = gdf_vec_imad[gdf_vec_imad['type'] == 4]
gdf_vector5 = gdf_vec_imad[gdf_vec_imad['type'] == 5]
gdf_vector6 = gdf_vec_imad[gdf_vec_imad['type'] == 6]

l9mean_vector1, l5mean_vector1, imadmean_vector1 = get_mean_values(gdf_vector1)
l9mean_vector2, l5mean_vector2, imadmean_vector2 = get_mean_values(gdf_vector2)
l9mean_vector3, l5mean_vector3, imadmean_vector3 = get_mean_values(gdf_vector3)
l9mean_vector4, l5mean_vector4, imadmean_vector4 = get_mean_values(gdf_vector4)
l9mean_vector5, l5mean_vector5, imadmean_vector5 = get_mean_values(gdf_vector5)
l9mean_vector6, l5mean_vector6, imadmean_vector6 = get_mean_values(gdf_vector6)

l9median_vector1, l5median_vector1, imadmedian_vector1 = get_median_values(gdf_vector1)
l9median_vector2, l5median_vector2, imadmedian_vector2 = get_median_values(gdf_vector2)
l9median_vector3, l5median_vector3, imadmedian_vector3 = get_median_values(gdf_vector3)
l9median_vector4, l5median_vector4, imadmedian_vector4 = get_median_values(gdf_vector4)
l9median_vector5, l5median_vector5, imadmedian_vector5 = get_median_values(gdf_vector5)
l9median_vector6, l5median_vector6, imadmedian_vector6 = get_median_values(gdf_vector6)

In [None]:
# list of categories for plotting
type_categories = [1, 2, 3, 4, 5, 6] # , 4, 5, 6
type_name = ['built-up', 'water', 'forests - coniferous', 'croplands', 'grasslands',  'forests - deciduous']
wavelength = np.array([0.482, 0.561, 0.654, 0.864, 1.609, 2.201]) # Wavelengths for Landsat bands: Blue, Green, Red, NIR, SWIR1, SWIR2
gdf_vectors = [gdf_vec_imad[gdf_vec_imad['type'] == t] for t in type_categories] 

fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(16, 10), sharex=True) # 

# for each type category plot mean spectral curves
for ax, gdf, type_value in zip(axes, gdf_vectors, type_categories):
    # get mean values for each type
    l9mean, l5mean, imadmean = get_mean_values(gdf)
    
    # plot mean spectral curves
    ax.plot(wavelength, l9mean,  linestyle='-', color='blue', label='Landsat 9') # marker='o',
    ax.plot(wavelength, l5mean,  linestyle='-', color='green', label='Landsat 5') # marker='s',
    ax.plot(wavelength, imadmean,  linestyle='-', color='red', label='EMD_10') # marker='d',
    
    # titles
    ax.set_title(f'Mean Spectral Curves for TYPE = {type_name[type_value-1]}', fontsize=12)
    ax.grid(True)
    ax.set_xlim(wavelength[0], wavelength[-1])  # 0.482 – 2.201
    ax.margins(x=0)    

fig.text(0.5, 0.04, 'Wavelength (µm)', ha='center', fontsize=9)
fig.text(0.04, 0.5, 'Reflectance (%)', va='center', rotation='vertical', fontsize=9)
axes[0].legend(loc='upper right')
#plt.tight_layout()
plt.show()


In [None]:
# GRAPHS OF MEAN/MEDIAN SPECTRAL CURVES
wavelength = np.array([0.482, 0.561, 0.654, 0.864, 1.609, 2.201])
plt.figure(figsize=(8, 2.5))

# mean spectral curves
# plt.plot(wavelength, l9mean_vector1, marker='o', linestyle='-', color='blue', label="Landsat 9")
# plt.plot(wavelength, l5mean_vector1, marker='s', linestyle='-', color='green', label="Landsat 5")
# plt.plot(wavelength, imadmean_vector1, marker='d', linestyle='-', color='red', label="IRMAD")

plt.plot(wavelength, l9median_vector1, marker='o', linestyle='-', color='blue', label='Landsat 9')
plt.plot(wavelength, l5median_vector1, marker='s', linestyle='-', color='green', label='Landsat 5')
plt.plot(wavelength, imadmedian_vector1, marker='d', linestyle='-', color='red', label='IRMAD 64 it.')

# titles
plt.xlabel('Wavelength (µm)', fontsize=9)
plt.ylabel('Reflectance', fontsize=9)
#plt.xlim(wavelength[0], wavelength[-1]) 
#plt.title('Mean Spectral Curves built-up, water, forests - coniferous, croplands, grasslands, forests - deciduous', fontsize=10))
plt.title('Mean spectral curves – built-up', fontsize=10)
plt.legend()
plt.grid(True)

plt.show()