In [None]:
import os
import sys

# Check if running in Google Colab
if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive')
    base_dir = "/content/drive/MyDrive/WildFire_RemoteSensing_workshop/WildFire_RemoteSensing/"
    print(f"📂 Running in Colab, base_dir set to: {base_dir}")
else:
    base_dir = ""  # Adjust as needed
    print(f"🖥️ Running locally, base_dir set to: {base_dir}")


In [None]:
import geopandas as gpd
import pandas as pd
# Load shapefile
 

import pandas as pd
 
base_dir=""
# Filter by country and date range
gdf = gpd.read_file(base_dir+"effis_layer/fire_records.csv")
gdf

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
 
from tqdm import tqdm
import pandas as pd
max_pixels_per_file=10000
data_dir=base_dir+"datacubes_2024"
def extract_pixels_and_labels(file_path, label, max_pixels):
    
    ds = xr.open_dataset(file_path)

    # Define desired Sentinel-2 bands
    desired_bands = [
        'B01', 'B02', 'B03', 'B04', 'B05', 'B06',
        'B07', 'B08', 'B8A', 'B09', 'B11', 'B12'
    ]

    # Filter only available & numeric desired bands
    available_bands = [
        band for band in desired_bands
        if band in ds.data_vars and np.issubdtype(ds[band].dtype, np.number)
    ]

    if not available_bands:
        print(f"⚠️ No valid bands found in {file_path}, skipping.")
        return np.empty((0, len(desired_bands))), np.array([])

    ds = ds[available_bands]

    # Select first time slice if needed
    if "t" in ds.dims:
        ds = ds.isel(t=0)

    # Convert to DataArray and reorder dimensions
    da = ds.to_array().transpose("y", "x", "variable")

    # Flatten and clean
    pixels = da.values.reshape(-1, da.shape[2]).astype(np.float32, copy=False)
    pixels = pixels[~np.isnan(pixels).any(axis=1)]

    # Sample subset
    np.random.shuffle(pixels)
    return pixels[:max_pixels], np.full(min(len(pixels), max_pixels), label)
def build_dataset(gdf, max_pixels):
    X, y = [], []
    for idx,row in gdf.iterrows():
        id = row['id']
        file_path=data_dir+'/'+ f"fire_{id}_before.nc"
        

        label = row['AREA_HA']#1 if "after" in os.path.basename(file_path).lower() else 0
        pixels, labels = extract_pixels_and_labels(file_path, label, max_pixels)
        X.append(pixels)
        y.append(labels)
    return np.vstack(X), np.hstack(y)
 
 
X, y = build_dataset(gdf, max_pixels_per_file)
df = pd.DataFrame(X, columns=[f"band_{i+1}" for i in range(X.shape[1])])
df["label"] = y
df.to_csv(base_dir+'DATA/pixel_dataset_ba.csv', index=False)
print("✅ Dataset saved   ")

<h2>Read from file </h2>

In [None]:
import pandas as pd
# Load the dataset

df = pd.read_csv(base_dir+"DATA/pixel_dataset_ba.csv")

#Clean noise from data
df = df[(df >= 0).all(axis=1)]
 
df.describe()

In [None]:
from sklearn.model_selection import train_test_split



# Separate features and labels
X = df.drop("label", axis=1).values
y = df["label"].values

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

''''''
from sklearn.preprocessing import StandardScaler
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


print("✅ Data loaded and split:")
print(f"  Train shape: {X_train.shape}")
print(f"  Test shape: {X_test.shape}")


<h3> Linear Regression </h3>

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
 
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt

# === Load Data ===
 

# === Linear Regression ===
model = LinearRegression()
model.fit(X_train, y_train)

# === Predict and Evaluate ===
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("-" * 40)
print(f"  R² score: {r2:.4f}")
print(f"  MSE:  {mse:.4f}")
print(f"  MAE:  {mae:.4f}")
 
    
predictions = model.predict(X_test) 

# Optional: compare predictions vs true values
plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

<h2> Polynomial Regression </h2>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
 
# === Generate polynomial features ===
 
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# === Fit linear regression on polynomial features ===
model = LinearRegression()
model.fit(X_train_poly, y_train)

# === Predict and evaluate ===
y_pred = model.predict(X_test_poly)
r2 = r2_score(y_test, y_pred)

print(f"Polynomial Regression R² score: {r2:.4f}")
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

mlpr = MLPRegressor(max_iter=50, 
                    hidden_layer_sizes= tuple(100 for _ in range(3)), 
                    solver='adam',
                    learning_rate='adaptive')

mlpr.fit(X_train, y_train)
predicted = mlpr.predict(X_test)
parameters = mlpr.get_params()
test_mse = mean_squared_error(y_test, predicted)
print(parameters)
print(test_mse)

predictions = mlpr.predict(X_test) 

# Optional: compare predictions vs true values
plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt

predictions = mlpr.predict(X_test) 

# Optional: compare predictions vs true values
plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

 
# === MODEL ===
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(12,)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1)  # Linear output for regression
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# === TRAINING ===
history = model.fit(
    X_train , y_train,
    validation_split=0.2,
    epochs=10,
    batch_size=32,
    verbose=1
)

# === EVALUATE ===
loss, mae = model.evaluate(X_test, y_test)
print(f"\n✅ Test MAE: {mae:.2f}")

# === PLOT TRAINING LOSS ===
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.legend()
plt.title("Training History")
plt.show()

# === PREDICT ===
predictions = model.predict(X_test).flatten()

# Optional: compare predictions vs true values
plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("True Burned Area")
plt.ylabel("Predicted Burned Area")
plt.title("Prediction vs Ground Truth")
plt.grid(True)
plt.show()
