### PREREQUISITES - Set base_dir

In [None]:
import os
import sys

# Check if running in Google Colab
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    base_dir = "/content/drive/MyDrive/WildFire_RemoteSensing_workshop/WildFire_RemoteSensing/"
    print(f"📂 Running in Colab, base_dir set to: {base_dir}")
else:
    base_dir = ""  # Adjust as needed
    print(f"🖥️ Running locally, base_dir set to: {base_dir}")


You should have most of the dependencies already, except for:

In [None]:
!pip install tensorflow

# Import libraries & Define functions

- Import necessary libraries.
- Define steps similar to the process in the previous lesson, 02_ML_classification_models.
  - `extract_pixels_and_labels()`: extracts pixel-level features and assigns labels (0 = "before fire", 1 = "after fire").
  - `build_dataset()`: loops through files, extracts features, and stacks them into `X` and `y`.
  - `save_dataset_to_csv()`: saves the resulting dataset as a CSV for reuse.

In [None]:
import xarray as xr
import numpy as np
from tqdm import tqdm
import geopandas as gpd
import pandas as pd

In [None]:
gdf = gpd.read_file(base_dir+"effis_layer/fire_records.csv")
gdf

In [None]:
max_pixels_per_file=10000
data_dir=base_dir+"datacubes_2024"
def extract_pixels_and_labels(file_path, label, max_pixels):
    
    ds = xr.open_dataset(file_path)

    desired_bands = [
        'B01', 'B02', 'B03', 'B04', 'B05', 'B06',
        'B07', 'B08', 'B8A', 'B09', 'B11', 'B12'
    ]

    available_bands = [
        band for band in desired_bands
        if band in ds.data_vars and np.issubdtype(ds[band].dtype, np.number)
    ]

    if not available_bands:
        print(f"⚠️ No valid bands found in {file_path}, skipping.")
        return np.empty((0, len(desired_bands))), np.array([])

    ds = ds[available_bands]

    if "t" in ds.dims:
        ds = ds.isel(t=0)

    da = ds.to_array().transpose("y", "x", "variable")

    pixels = da.values.reshape(-1, da.shape[2]).astype(np.float32, copy=False)
    pixels = pixels[~np.isnan(pixels).any(axis=1)]

    np.random.shuffle(pixels)
    return pixels[:max_pixels], np.full(min(len(pixels), max_pixels), label)
def build_dataset(gdf, max_pixels):
    X, y = [], []
    for idx,row in gdf.iterrows():
        id = row['id']
        file_path=data_dir+'/'+ f"fire_{id}_before.nc"
        

        label = row['AREA_HA']
        pixels, labels = extract_pixels_and_labels(file_path, label, max_pixels)
        X.append(pixels)
        y.append(labels)
    return np.vstack(X), np.hstack(y)
 
 
X, y = build_dataset(gdf, max_pixels_per_file)
df = pd.DataFrame(X, columns=[f"band_{i+1}" for i in range(X.shape[1])])
df["label"] = y
df.to_csv(base_dir+'DATA/pixel_dataset_ba.csv', index=False)
print("✅ Dataset saved.")

## Load the created `csv` and use it for analysis.

In [None]:
df = pd.read_csv(base_dir+"DATA/pixel_dataset_ba.csv")
df = df[(df >= 0).all(axis=1)]
df.describe()

Split and scale the data.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop("label", axis=1).values
y = df["label"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("✅ Data loaded and split:")
print(f"  Train shape: {X_train.shape}")
print(f"  Test shape: {X_test.shape}")


### Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("-" * 40)
print(f"  R² score: {r2:.4f}")
print(f"  MSE:  {mse:.4f}")
print(f"  MAE:  {mae:.4f}")
 
predictions = model.predict(X_test) 

plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

### Polynomial Regression

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
 
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

model = LinearRegression()
model.fit(X_train_poly, y_train)

y_pred = model.predict(X_test_poly)
r2 = r2_score(y_test, y_pred)

print(f"Polynomial Regression R² score: {r2:.4f}")
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

### MLP Regression

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

mlpr = MLPRegressor(max_iter=50, 
                    hidden_layer_sizes= tuple(100 for _ in range(3)), 
                    solver='adam',
                    learning_rate='adaptive')

mlpr.fit(X_train, y_train)
predicted = mlpr.predict(X_test)
parameters = mlpr.get_params()
test_mse = mean_squared_error(y_test, predicted)
print(parameters)
print(test_mse)

predictions = mlpr.predict(X_test) 

plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

In [None]:
predictions = mlpr.predict(X_test) 

plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

### Neural Network Regression

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt


model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(12,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1)
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

history = model.fit(
    X_train , y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=32,
    verbose=1
)

loss, mae = model.evaluate(X_test, y_test)
print(f"\n✅ Test MAE: {mae:.2f}")

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.legend()
plt.title("Training History")
plt.show()

predictions = model.predict(X_test).flatten()

plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()
