In [None]:
import itertools                    # For creating combinations of parameters

import numpy as np                  # For numerical operations
import pandas as pd                 # For data manipulation
import matplotlib.pyplot as plt     # For plotting

from IPython.display import display # For displaying DataFrames in Jupyter

In [None]:
# Data import
# Load May export data
data = pd.read_csv('path/to/your/data.csv', delimiter=',')

# Data exploration
print('Number of areas/polygons:', len(data['ID'].unique()))

# Extract date from system:index (first 9 characters) and convert to datetime
data = data.assign(date=data['system:index'].str[:9])
data['date'] = pd.to_datetime(data['date'])

In [None]:
# Filter records by NDSI threshold
filtered_data = data[data['NDSI'] < 0.4]

In [None]:
# Check unique IDs and dates
unique_ids = filtered_data['ID'].unique()
unique_dates = filtered_data['date'].unique()

# Define features for correlation analysis
features_all = ['NDVI', 'EVI', 'FAPAR', 'LAI', 'NDMI', 'MSAVI', 'NDRE', 'WNDII', 'TCW']

In [None]:
# Compute Pearson correlation matrix for all features
corr_all = filtered_data[features_all].corr(method='pearson')

In [None]:
# Plot heatmap with correlation values
fig, ax = plt.subplots(figsize=(9, 9))
cax = ax.imshow(corr_all, interpolation='none', aspect='auto', vmin=-1, vmax=1)
fig.colorbar(cax, label='Pearson r')
ax.set_xticks(np.arange(len(features_all)))
ax.set_yticks(np.arange(len(features_all)))
ax.set_xticklabels(features_all, rotation=45, ha='right')
ax.set_yticklabels(features_all)
ax.set_title('Pearson Correlation Matrix')
# Annotate correlation values in each cell
for i in range(len(features_all)):
    for j in range(len(features_all)):
        ax.text(j, i, f"{corr_all.iat[i, j]:.2f}", ha='center', va='center')
plt.tight_layout()
plt.show()

In [None]:
# Compute absolute correlations and zero the diagonal
corr_abs = corr_all.abs().copy()
np.fill_diagonal(corr_abs.values, 0.0)

# Compute average absolute correlation for each feature
mean_abs_corr = corr_abs.mean(axis=1)

# Sort features by ascending mean correlation (less redundant first)
mean_abs_corr = mean_abs_corr.sort_values()
print("Average |r| to others (lowest = least redundant):")
print(mean_abs_corr)

# Select top n representatives (e.g., 6)
n_reps = 6
representatives = list(mean_abs_corr.index[:n_reps])
print(f"\nSelected {n_reps} indices for clustering:", representatives)

# Generate all combinations of representatives (size ≥ 2)
combos = []
for size in range(2, len(representatives) + 1):
    combos += list(itertools.combinations(representatives, size))
print("\nProposed combinations for time-series clustering:")
for c in combos:
    print(" ", c)

In [None]:
# Extract upper triangle of correlation matrix and sort pairs by strength
mask = np.triu(np.ones(corr_all.shape), k=1).astype(bool)
corr_pairs = corr_all.where(mask).stack().sort_values(ascending=True)

# Print top N strongest correlated pairs
top_n = 20
print(f"Top {top_n} strongest correlated pairs:")
print(corr_pairs.head(top_n))

# Prepare combinations DataFrame
df_combos = pd.DataFrame({
    'Size': [len(c) for c in combos],
    'Combination': [' & '.join(c) for c in combos]
})
display(df_combos)

In [None]:
# Define forbidden pairs to exclude
forbidden_pairs = [
    ('WNDII', 'NDMI'),
    ('EVI', 'MSAVI'),
    ('WNDII', 'TCW'),
    ('NDMI', 'TCW'),
]

# Filter out combinations containing any forbidden pair
filtered_combos = [
    combo for combo in combos
    if not any(set(pair).issubset(combo) for pair in forbidden_pairs)
]

# Display filtered combinations
df_filtered = pd.DataFrame({
    'Size': [len(c) for c in filtered_combos],
    'Combination': [' & '.join(c) for c in filtered_combos]
})
print(df_filtered)
