<a href="https://colab.research.google.com/github/melkatewabe10/Machine-learning_LST-Estimation-/blob/main/RF_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **RF_model**

In [None]:
!pip install rasterio
!pip install joblib
!pip install scikit-learn
!pip install scipy
!pip install fiona

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.3
Collecting fiona
  Downloadin

# Seasonal  model traninig

In [None]:
import os
import time
import joblib
import numpy as np
import pandas as pd
import rasterio
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from collections import OrderedDict

# ===================================
# Configuration
# ===================================
data_folder = '/content/drive/MyDrive/NEW FOLDER/NEWTRANING'  # <-- Update as needed
output_folder = '/content/drive/MyDrive/NEW FOLDER/MODEL'
os.makedirs(output_folder, exist_ok=True)

years = list(range(2017, 2025))  # Extend as needed
seasons = [f"{s:02d}" for s in range(1, 5)]  # '01' to '04'
predictor_names = ['NDVI', 'EVI', 'NDWI','LAI', 'ALB', 'ELV', 'SLP', 'DSR']

# ===================================
# Helper Function to Read Raster
# ===================================
def read_raster(raster_path):
    with rasterio.open(raster_path) as src:
        return src.read(1)

# ===================================
# Store All Results
# ===================================
results_list = []

# ===================================
# Main Loop over Years and Seasons
# ===================================
for year in years:
    for season in seasons:
        label = f"{year}_{season}"
        print(f"\nProcessing {label}...")

        # --------------------------
        # Step 1: Load Predictors
        # --------------------------
        predictors = OrderedDict()
        missing = False
        for var in predictor_names:
            path = os.path.join(data_folder, f"{var}_{label}.tif")
            if not os.path.exists(path):
                print(f"Missing file: {path}. Skipping {label}.")
                missing = True
                break
            predictors[var] = read_raster(path)
        if missing:
            continue

        # --------------------------
        # Step 2: Load LST
        # --------------------------
        lst_path = os.path.join(data_folder, f"LST_{label}.tif")
        if not os.path.exists(lst_path):
            print(f"Missing LST file: {lst_path}. Skipping {label}.")
            continue
        lst = read_raster(lst_path)

        # --------------------------
        # Step 3: Stack Predictors and Target
        # --------------------------
        feature_stack = np.vstack([predictors[var].flatten() for var in predictor_names]).T
        lst_flat = lst.flatten()

        # Filter valid pixels
        valid_mask = np.isfinite(feature_stack).all(axis=1) & np.isfinite(lst_flat)
        X = feature_stack[valid_mask]
        y = lst_flat[valid_mask]

        if len(y) < 100:
            print(f"Too few valid pixels ({len(y)}). Skipping {label}.")
            continue

        # --------------------------
        # Step 4: Define Bagging-Based Random Forest
        # --------------------------
        model = RandomForestRegressor(
            n_estimators=200,
            max_features=3,
            min_samples_split=2,
            max_depth=None,
            bootstrap=True,
            oob_score=True,
            random_state=42,
            n_jobs=-1
        )

        # --------------------------
        # Step 5: Train Model
        # --------------------------
        print(f"Training Random Forest for {label}...")
        start = time.time()
        model.fit(X, y)
        elapsed = time.time() - start
        print(f"Training complete in {elapsed:.2f} seconds.")

        # --------------------------
        # Step 6: Evaluate Model
        # --------------------------
        y_pred = model.predict(X)
        mr2 = r2_score(y, y_pred)
        rmse = np.sqrt(mean_squared_error(y, y_pred))
        mae = mean_absolute_error(y, y_pred)
        oob = model.oob_score_

        # --------------------------
        # Step 7: Save Model
        # --------------------------
        model_file = os.path.join(output_folder, f"RF_{label}.pkl")
        joblib.dump(model, model_file)

        # --------------------------
        # Step 8: Save Feature Importances
        # --------------------------
        importance_df = pd.DataFrame({
            'Feature': predictor_names,
            'Importance': model.feature_importances_
        })
        importance_file = os.path.join(output_folder, f"Importance_{label}.csv")
        importance_df.to_csv(importance_file, index=False)

        # --------------------------
        # Step 9: Save R² for Each Predictor Using Linear Regression
        # --------------------------
        r2_data = []
        for i, var in enumerate(predictor_names):
            X_pred = X[:, i].reshape(-1, 1)
            model_lr = LinearRegression()
            model_lr.fit(X_pred, y)
            r2 = model_lr.score(X_pred, y)
            r2_data.append({'Feature': var, 'R2': r2})

        r2_df = pd.DataFrame(r2_data)
        r2_file = os.path.join(output_folder, f"R2_{label}.csv")
        r2_df.to_csv(r2_file, index=False)

        # --------------------------
        # Step 10: Save Summary Metrics
        # --------------------------
        results_list.append({
            'Year': year,
            'Season': season,
            'OOB_Score': oob,
            'R2': mr2,
            'RMSE': rmse,
            'MAE': mae,
            'Train_Time_sec': elapsed
        })

# ===================================
# Final Summary CSV
# ===================================
summary_df = pd.DataFrame(results_list)
summary_file = os.path.join(output_folder, "Summary_Seasonal_Results.csv")
summary_df.to_csv(summary_file, index=False)

print("\nSeasonal model training completed. Summary saved to:")
print(summary_file)



Processing 2017_01...
Training Random Forest for 2017_01...
Training complete in 23.17 seconds.

Processing 2017_02...
Training Random Forest for 2017_02...
Training complete in 24.73 seconds.

Processing 2017_03...
Training Random Forest for 2017_03...
Training complete in 25.21 seconds.

Processing 2017_04...
Training Random Forest for 2017_04...
Training complete in 25.22 seconds.

Processing 2018_01...
Training Random Forest for 2018_01...
Training complete in 26.71 seconds.

Processing 2018_02...
Training Random Forest for 2018_02...
Training complete in 25.39 seconds.

Processing 2018_03...
Training Random Forest for 2018_03...
Training complete in 25.84 seconds.

Processing 2018_04...
Training Random Forest for 2018_04...
Training complete in 26.57 seconds.

Processing 2019_01...
Training Random Forest for 2019_01...
Training complete in 26.39 seconds.

Processing 2019_02...
Training Random Forest for 2019_02...
Training complete in 26.63 seconds.

Processing 2019_03...
Trainin

# Feature importance csv file

In [None]:
import os
import pandas as pd

# ========== CONFIGURATION ==========
folder = '/content/drive/MyDrive/NEW FOLDER/MODEL'  # <- Update this path
output_folder= '/content/drive/MyDrive/NEW FOLDER/STAT'
output_csv = os.path.join(output_folder, 'Importanc_01.csv')

# ========== SCAN AND FILTER FILES ==========
all_files = [
    f for f in os.listdir(folder)
    if f.startswith("Importance_") and f.endswith(".csv")
]

# ========== PROCESS AND COMBINE ==========
df_list = []

for file in all_files:
    try:
        # Extract year and season code from filename
        parts = file.replace('.csv', '').split('_')
        year = int(parts[1])
        season_code = parts[2].zfill(2)  # Make sure it's '01', '02', etc.

        # Read the file
        file_path = os.path.join(folder, file)
        df = pd.read_csv(file_path)

        # Add metadata
        df['Year'] = year
        df['Season_Code'] = season_code

        df_list.append(df)

    except Exception as e:
        print(f"Error processing file {file}: {e}")

# ========== FINAL CONCATENATION ==========
if df_list:
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df.to_csv(output_csv, index=False)
    print(f"✅ Combined seasonal importance saved to:\n{output_csv}")
else:
    print("No importance files were found or processed.")


✅ Combined seasonal importance saved to:
/content/drive/MyDrive/SEASON_STAT/Importanc_03.csv


# R2_csv_Data

In [None]:
import os
import pandas as pd

# ========== CONFIGURATION ==========
folder = '/content/drive/MyDrive/SEASON_DATA_MODEL'  # <- Update this path
output_folder= '/content/drive/MyDrive/SEASON_STAT'
output_csv = os.path.join(output_folder, 'Rsquare_03.csv')

# ========== SCAN AND FILTER FILES ==========
all_files = [
    f for f in os.listdir(folder)
    if f.startswith("R2_") and f.endswith(".csv")
]

# ========== PROCESS AND COMBINE ==========
df_list = []

for file in all_files:
    try:
        # Extract year and season code from filename
        parts = file.replace('.csv', '').split('_')
        year = int(parts[1])
        season_code = parts[2].zfill(2)  # Make sure it's '01', '02', etc.

        # Read the file
        file_path = os.path.join(folder, file)
        df = pd.read_csv(file_path)

        # Add metadata
        df['Year'] = year
        df['Season_Code'] = season_code

        df_list.append(df)

    except Exception as e:
        print(f"Error processing file {file}: {e}")

# ========== FINAL CONCATENATION ==========
if df_list:
    combined_df = pd.concat(df_list, ignore_index=True)
    combined_df.to_csv(output_csv, index=False)
    print(f"Combined seasonal importance saved to:\n{output_csv}")
else:
    print("No R2 files were found or processed.")

Combined seasonal importance saved to:
/content/drive/MyDrive/SEASON_STAT/Rsquare_03.csv


# Monthly prediction

In [None]:
import os
import time
import joblib
import numpy as np
import pandas as pd
import rasterio
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr
from collections import OrderedDict

# ===================================
# Configuration
# ===================================
data_folder = '/content/drive/MyDrive/MMASK_FIVE'  # <-- Update as needed
output_folder = '/content/drive/MyDrive/MMASK_FIVE_DD3'
os.makedirs(output_folder, exist_ok=True)

years = list(range(2021, 2022))
months = [f"{m:02d}" for m in range(1, 13)]  # '01' to '12'
predictor_names = ['NDVI', 'EVI', 'NDWI', 'LAI', 'ALB', 'ELV', 'SLP', 'DSR']

# ===================================
# Helper Function to Read Raster
# ===================================
def read_raster(raster_path):
    with rasterio.open(raster_path) as src:
        return src.read(1)

# ===================================
# Store All Results
# ===================================
results_list = []

# ===================================
# Main Loop over Years and Months
# ===================================
for year in years:
    for month in months:
        label = f"{year}_{month}"
        print(f"\nProcessing {label}...")

        # --------------------------
        # Step 1: Load Predictors
        # --------------------------
        predictors = OrderedDict()
        missing = False
        for var in predictor_names:
            path = os.path.join(data_folder, f"{var}_{label}.tif")
            if not os.path.exists(path):
                print(f"Missing file: {path}. Skipping {label}.")
                missing = True
                break
            predictors[var] = read_raster(path)
        if missing:
            continue

        # --------------------------
        # Step 2: Load LST
        # --------------------------
        lst_path = os.path.join(data_folder, f"LST_{label}.tif")
        if not os.path.exists(lst_path):
            print(f"Missing LST file: {lst_path}. Skipping {label}.")
            continue
        lst = read_raster(lst_path)

        # --------------------------
        # Step 3: Stack Predictors and Target
        # --------------------------
        feature_stack = np.vstack([predictors[var].flatten() for var in predictor_names]).T
        lst_flat = lst.flatten()

        # Filter valid pixels
        valid_mask = np.isfinite(feature_stack).all(axis=1) & np.isfinite(lst_flat)
        X = feature_stack[valid_mask]
        y = lst_flat[valid_mask]

        if len(y) < 100:
            print(f"Too few valid pixels ({len(y)}). Skipping {label}.")
            continue

        # --------------------------
        # Step 4: Define Bagging-Based Random Forest
        # --------------------------
        model = RandomForestRegressor(
            n_estimators=200,
            max_features=3,
            min_samples_split=2,
            max_depth=None,
            bootstrap=True,
            oob_score=True,
            random_state=42,
            n_jobs=-1
        )

        # --------------------------
        # Step 5: Train Model
        # --------------------------
        print(f"Training Random Forest for {label}...")
        start = time.time()
        model.fit(X, y)
        elapsed = time.time() - start
        print(f"Training complete in {elapsed:.2f} seconds.")

        # --------------------------
        # Step 6: Evaluate Model
        # --------------------------
        y_pred = model.predict(X)
        mr2 = r2_score(y, y_pred)
        rmse = np.sqrt(mean_squared_error(y, y_pred))
        mae = mean_absolute_error(y, y_pred)
        oob = model.oob_score_

        # --------------------------
        # Step 7: Save Model
        # --------------------------
        model_file = os.path.join(output_folder, f"RF_{label}.pkl")
        joblib.dump(model, model_file)

        # --------------------------
        # Step 8: Save Feature Importances
        # --------------------------
        importance_df = pd.DataFrame({
            'Feature': predictor_names,
            'Importance': model.feature_importances_
        })
        importance_file = os.path.join(output_folder, f"Importance_{label}.csv")
        importance_df.to_csv(importance_file, index=False)

        # --------------------------
        # Step 9: Save R² for Each Predictor Using Linear Regression
        # --------------------------
        r2_data = []
        for i, var in enumerate(predictor_names):
            # Reshape the predictor to be 2D as required by LinearRegression
            X_pred = X[:, i].reshape(-1, 1)

            # Fit a simple linear regression model between the predictor and the target
            model_lr = LinearRegression()
            model_lr.fit(X_pred, y)

            # Calculate R² for this predictor
            r2 = model_lr.score(X_pred, y)
            r2_data.append({'Feature': var, 'R2': r2})

        r2_df = pd.DataFrame(r2_data)
        r2_file = os.path.join(output_folder, f"R2_{label}.csv")
        r2_df.to_csv(r2_file, index=False)

        # --------------------------
        # Step 10: Save Summary Metrics
        # --------------------------
        results_list.append({
            'Year': year,
            'Month': month,
            'OOB_Score': oob,
            'R2': mr2,
            'RMSE': rmse,
            'MAE': mae,
            'Train_Time_sec': elapsed
        })

# ===================================
# Final Summary CSV
# ===================================
summary_df = pd.DataFrame(results_list)
summary_file = os.path.join(output_folder, "Summary_Monthly_Results.csv")
summary_df.to_csv(summary_file, index=False)

print("\nMonthly model training completed. Summary saved to:")
print(summary_file)


# based on data exploration

In [None]:
!pip install rasterio
!pip install joblib
!pip install scikit-learn
!pip install scipy
!pip install fiona

In [None]:
#  Import Necessary Libraries
# =============================================================================
import os
import time
import pandas as pd
import numpy as np
from scipy import stats
import rasterio
import fiona
import joblib
import matplotlib.pyplot as plt
from rasterio.plot import show
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr
from rasterio.mask import mask
# Additional libraries for visualization and file operations
from collections import OrderedDict

# ===================================
# Configuration
# ===================================
data_folder = '/content/drive/MyDrive/MONTHLY_THREE'  # <-- Update as needed
output_folder = '/content/drive/MyDrive/MMASK_THREE_R22'
os.makedirs(output_folder, exist_ok=True)

years = list(range(2011, 2012))
months = [f"{m:02d}" for m in range(1, 13)]  # '01' to '12'
predictor_names = ['NDVI', 'EVI', 'NDWI', 'LAI', 'ALB', 'ELV', 'SLP', 'DSR']

# ===================================
# Helper Function to Read Raster
# ===================================
def read_raster(raster_path):
    with rasterio.open(raster_path) as src:
        return src.read(1)

# ===================================
# Store All Results
# ===================================
results_list = []

# ===================================
# Main Loop over Years and Months
# ===================================
for year in years:
    for month in months:
        label = f"{year}_{month}"
        print(f"\nProcessing {label}...")

        # --------------------------
        # Step 1: Load Predictors
        # --------------------------
        predictors = OrderedDict()
        missing = False
        for var in predictor_names:
            path = os.path.join(data_folder, f"{var}_{label}.tif")
            if not os.path.exists(path):
                print(f"Missing file: {path}. Skipping {label}.")
                missing = True
                break
            predictors[var] = read_raster(path)
        if missing:
            continue

        # --------------------------
        # Optional: Inspect Predictor Data
        # --------------------------
        print(f"Predictor statistics for {label}:")
        for var_name, data in predictors.items():
            valid_data = data[np.isfinite(data)]
            if valid_data.size == 0:
                print(f"{var_name}: All values are NaN. Skipping visualization.")
                continue
            print(f"{var_name}: min={np.min(valid_data):.2f}, max={np.max(valid_data):.2f}, mean={np.mean(valid_data):.2f}")

            # # Optional: Plot thumbnails
            # plt.figure(figsize=(3, 3))
            # plt.imshow(data, cmap='viridis')
            # plt.colorbar(label=var_name)
            # plt.title(f"{var_name} - {label}")
            # plt.axis('off')
            # plt.tight_layout()
            # plt.show()

        # --------------------------
        # Step 2: Load LST
        # --------------------------
        lst_path = os.path.join(data_folder, f"LST_{label}.tif")
        if not os.path.exists(lst_path):
            print(f"Missing LST file: {lst_path}. Skipping {label}.")
            continue
        lst = read_raster(lst_path)

        # --------------------------
        # Step 3: Stack Predictors and Target
        # --------------------------
        feature_stack = np.vstack([predictors[var].flatten() for var in predictor_names]).T
        lst_flat = lst.flatten()

        # Filter valid pixels
        valid_mask = np.isfinite(feature_stack).all(axis=1) & np.isfinite(lst_flat)
        X = feature_stack[valid_mask]
        y = lst_flat[valid_mask]

        if len(y) < 100:
            print(f"Too few valid pixels ({len(y)}). Skipping {label}.")
            continue

        # --------------------------
        # Step 4: Define Bagging-Based Random Forest
        # --------------------------
        model = RandomForestRegressor(
            n_estimators=500,
            max_features="log2",
            min_samples_split=5,
            max_depth=10,
            min_samples_leaf=4,
            bootstrap=True,
            oob_score=True,
            random_state=42,
            n_jobs=-1
        )

        # --------------------------
        # Step 5: Train Model
        # --------------------------
        print(f"Training Random Forest for {label}...")
        start = time.time()
        model.fit(X, y)
        elapsed = time.time() - start
        print(f"Training complete in {elapsed:.2f} seconds.")

        # --------------------------
        # Step 6: Evaluate Model
        # --------------------------
        y_pred = model.predict(X)
        r2 = r2_score(y, y_pred)
        rmse = np.sqrt(mean_squared_error(y, y_pred))
        mae = mean_absolute_error(y, y_pred)
        oob = model.oob_score_

        # --------------------------
        # Step 7: Save Model
        # --------------------------
        model_file = os.path.join(output_folder, f"RF_{label}.pkl")
        joblib.dump(model, model_file)

        # --------------------------
        # Step 8: Save Feature Importances
        # --------------------------
        importance_df = pd.DataFrame({
            'Feature': predictor_names,
            'Importance': model.feature_importances_
        })
        importance_file = os.path.join(output_folder, f"Importance_{label}.csv")
        importance_df.to_csv(importance_file, index=False)

        # --------------------------
        # Step 9: Save Pearson Correlations
        # --------------------------
        corr_data = []
        for i, var in enumerate(predictor_names):
            corr, _ = pearsonr(X[:, i], y)
            corr_data.append({'Feature': var, 'PearsonR': corr})
        corr_df = pd.DataFrame(corr_data)
        corr_file = os.path.join(output_folder, f"Correlation_{label}.csv")
        corr_df.to_csv(corr_file, index=False)

        # --------------------------
        # Step 10: Save Summary Metrics
        # --------------------------
        results_list.append({
            'Year': year,
            'Month': month,
            'OOB_Score': oob,
            'R2': r2,
            'RMSE': rmse,
            'MAE': mae,
            'Train_Time_sec': elapsed
        })

# ===================================
# Final Summary CSV
# ===================================
summary_df = pd.DataFrame(results_list)
summary_file = os.path.join(output_folder, "Summary_Monthly_Results.csv")
summary_df.to_csv(summary_file, index=False)

print("\nMonthly model training completed. Summary saved to:")
print(summary_file)


# Seasonal prediction

In [None]:
import os
import time
import joblib
import numpy as np
import pandas as pd
import rasterio
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr
from collections import OrderedDict

# ===================================
# Configuration
# ===================================
data_folder = '/content/drive/MyDrive/Taiwan/'  # Change to your actual path
output_folder = '/content/drive/MyDrive/Model_Results'
os.makedirs(output_folder, exist_ok=True)

seasons = ['Spring', 'Summer', 'Autumn', 'Winter']
years = list(range(2000, 2026))
predictor_names = ['NDVI', 'EVI', 'NDWI', 'LAI', 'ALB', 'ELV', 'SLP', 'DSR']

# ===================================
# Helper Function to Read Raster
# ===================================
def read_raster(raster_path):
    with rasterio.open(raster_path) as src:
        data = src.read(1)
        return data

# ===================================
# Store All Results
# ===================================
results_list = []

# ===================================
# Main Loop over Years and Seasons
# ===================================
for year in years:
    for season in seasons:
        print(f"\nProcessing {year}_{season}...")

        # --------------------------
        # Step 1: Load Predictor Rasters
        # --------------------------
        predictors = OrderedDict()
        missing = False
        for var in predictor_names:
            path = os.path.join(data_folder, f"{var}_{year}_{season}.tif")
            if not os.path.exists(path):
                print(f"Missing file: {path}. Skipping {year}_{season}.")
                missing = True
                break
            predictors[var] = read_raster(path)
        if missing:
            continue

        # --------------------------
        # Step 2: Load LST Raster
        # --------------------------
        lst_path = os.path.join(data_folder, f"LST_{year}_{season}.tif")
        if not os.path.exists(lst_path):
            print(f"Missing LST file: {lst_path}. Skipping {year}_{season}.")
            continue
        lst = read_raster(lst_path)

        # --------------------------
        # Step 3: Stack Predictors and Target
        # --------------------------
        feature_stack = np.vstack([predictors[var].flatten() for var in predictor_names]).T
        lst_flat = lst.flatten()

        # Filter valid pixels
        valid_mask = np.isfinite(feature_stack).all(axis=1) & np.isfinite(lst_flat)
        X = feature_stack[valid_mask]
        y = lst_flat[valid_mask]

        if len(y) < 100:
            print(f"Too few valid pixels ({len(y)}). Skipping {year}_{season}.")
            continue

        # --------------------------
        # Step 4: Define Bagging-Based Random Forest
        # --------------------------
        model = RandomForestRegressor(
            n_estimators=500,
            max_features="log2",
            min_samples_split=5,
            max_depth=10,
            min_samples_leaf=4,
            bootstrap=True,
            oob_score=True,
            random_state=42,
            n_jobs=-1
        )

        # --------------------------
        # Step 5: Train Model
        # --------------------------
        print(f"Training Random Forest for {year}_{season}...")
        start = time.time()
        model.fit(X, y)
        elapsed = time.time() - start
        print(f"Training complete in {elapsed:.2f} seconds.")

        # --------------------------
        # Step 6: Evaluate Model
        # --------------------------
        y_pred = model.predict(X)
        r2 = r2_score(y, y_pred)
        rmse = mean_squared_error(y, y_pred, squared=False)
        mae = mean_absolute_error(y, y_pred)
        oob = model.oob_score_

        # --------------------------
        # Step 7: Save Model
        # --------------------------
        model_file = os.path.join(output_folder, f"RF_{year}_{season}.pkl")
        joblib.dump(model, model_file)

        # --------------------------
        # Step 8: Save Feature Importances
        # --------------------------
        importance_df = pd.DataFrame({
            'Feature': predictor_names,
            'Importance': model.feature_importances_
        })
        importance_file = os.path.join(output_folder, f"Importance_{year}_{season}.csv")
        importance_df.to_csv(importance_file, index=False)

        # --------------------------
        # Step 9: Save Pearson Correlations
        # --------------------------
        corr_data = []
        for i, var in enumerate(predictor_names):
            corr, _ = pearsonr(X[:, i], y)
            corr_data.append({'Feature': var, 'PearsonR': corr})
        corr_df = pd.DataFrame(corr_data)
        corr_file = os.path.join(output_folder, f"Correlation_{year}_{season}.csv")
        corr_df.to_csv(corr_file, index=False)

        # --------------------------
        # Step 10: Save Metrics
        # --------------------------
        results_list.append({
            'Year': year,
            'Season': season,
            'OOB_Score': oob,
            'R2': r2,
            'RMSE': rmse,
            'MAE': mae,
            'Train_Time_sec': elapsed
        })

# ===================================
# Final Summary CSV
# ===================================
summary_df = pd.DataFrame(results_list)
summary_file = os.path.join(output_folder, "Summary_All_Results.csv")
summary_df.to_csv(summary_file, index=False)

print("\nAll training and saving completed. Summary saved to:")
print(summary_file)

