In [1]:
import os
import numpy as np
import pandas as pd
import joblib

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA


In [2]:
def flatten_feature_cube(x_pt, feature_names):
    """
    Converts x(p,t) cube to ML-ready DataFrame.
    Drops NaNs safely.
    """
    df = x_pt[feature_names].to_dataframe().reset_index()
    df = df.dropna().reset_index(drop=True)

    assert len(df) > 0, "‚ùå No valid samples after flattening"
    return df


In [3]:
def flatten_feature_cube(x_pt, feature_names):
    """
    Converts x(p,t) cube to ML-ready DataFrame.
    Drops NaNs safely.
    """
    # Ensure feature_names is a list, then select from x_pt
    # We use single brackets here because feature_names is already a collection
    df = x_pt[list(feature_names)].to_dataframe().reset_index()
    
    df = df.dropna().reset_index(drop=True)

    assert len(df) > 0, "‚ùå No valid samples after flattening"
    return df

In [4]:
def train_baseline_model(
    x_pt,
    mine_id,
    feature_names=("NDVI", "NBR", "BSI", "B11", "B12"),
    n_components=3,
    model_dir="./baseline_models",
    store_scores=False   # ‚Üê control memory usage
):
    """
    Trains a baseline PCA model for a single mine.
    Stores baseline anomaly score statistics.
    """

    print(f"\nüß† Training baseline model for mine: {mine_id}")

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    # 1. Flatten features
    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    df = flatten_feature_cube(x_pt, feature_names)
    X = df[list(feature_names)].values

    print(f"‚úî Training samples: {X.shape[0]}")
    print(f"‚úî Features used: {feature_names}")

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    # 2. Scale
    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    # 3. PCA training
    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    pca = PCA(n_components=n_components, random_state=42)
    X_latent = pca.fit_transform(X_scaled)

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    # 4. Baseline reconstruction error
    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    X_recon = pca.inverse_transform(X_latent)
    recon_error = np.mean((X_scaled - X_recon) ** 2, axis=1)

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    # 5. Diagnostics (printed)
    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    print("\nüìä PCA Explained Variance Ratio:")
    for i, v in enumerate(pca.explained_variance_ratio_):
        print(f"  PC{i+1}: {v:.4f}")

    print(f"\nüìà Cumulative variance: {pca.explained_variance_ratio_.sum():.4f}")

    print("\nüß™ Baseline Anomaly Score Stats:")
    print(f"  Mean : {recon_error.mean():.6f}")
    print(f"  Std  : {recon_error.std():.6f}")
    print(f"  P95  : {np.percentile(recon_error, 95):.6f}")
    print(f"  P99  : {np.percentile(recon_error, 99):.6f}")
    print(f"  Max  : {recon_error.max():.6f}")

    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    # 6. Persist model + baseline reference
    # ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
    os.makedirs(model_dir, exist_ok=True)

    model_bundle = {
        "mine_id": mine_id,
        "features": feature_names,
        "scaler": scaler,
        "pca": pca,

        # üîë BASELINE ANOMALY REFERENCE
        "baseline_anomaly": {
            "mean": float(recon_error.mean()),
            "p95": float(np.percentile(recon_error, 95)),
            "p99": float(np.percentile(recon_error, 99)),
            "max": float(recon_error.max()),
            "std": float(recon_error.std()),
            "scores": recon_error.astype("float32") # Storing this for your histogram!
        },
    }

    # Optional: store full distribution (large)
    if store_scores:
        model_bundle["baseline_anomaly"]["scores"] = recon_error.astype("float32")

    model_path = os.path.join(
        model_dir, f"{mine_id}_baseline_model.joblib"
    )
    joblib.dump(model_bundle, model_path)

    print(f"\nüíæ Baseline model saved to:")
    print(f"   {model_path}")

    return model_bundle


In [5]:
import ipynb
from ipynb.fs.defs.featurecube import build_feature_cube

In [6]:
DATA_DIR  = "content/mine01/baseline"

In [8]:
AOI = {"type":"Polygon","coordinates":[[[81.1540853393247,21.192409387913486],[81.1540573545225,21.19240716153938],[81.15402929421587,21.192408272781766],[81.15400160916909,21.192412703789593],[81.1539747441181,21.192420383382466],[81.15394913062669,21.19243118819471],[81.15392518015352,21.19244494465619],[81.15390327744277,21.192461431781368],[81.15388377434329,21.19248038471881],[81.15386698415662,21.19250149900575],[81.153853176604,21.192524435459198],[81.1538425734937,21.192548825624286],[81.15383534515738,21.192574277693538],[81.15383160771387,21.192600382800325],[81.15383142120422,21.192626721587565],[81.15383478862628,21.192652870944112],[81.15402876354473,21.193632514653288],[81.1533611590414,21.19403782960822],[81.15333861820847,21.194053498997295],[81.15331836942244,21.194071714068702],[81.15330073732551,21.194092182787067],[81.15328600460752,21.19411457698494],[81.15327440747441,21.194138537624458],[81.15326613186016,21.194163680552997],[81.15326131044677,21.194189602662878],[81.15326002053592,21.19421588835357],[81.15326228281037,21.194242116195422],[81.15353860668199,21.19602838075773],[81.15354494409702,21.196055985653967],[81.15355521340598,21.196082530018717],[81.15356922500995,21.196107523766145],[81.15358672021567,21.196130505439243],[81.15498677363634,21.197727820950835],[81.15500178256848,21.197743287327015],[81.1550183730142,21.197757260784606],[81.15641842754856,21.198822128399758],[81.15644127733951,21.198837365806295],[81.15646599591159,21.198849775462442],[81.15649218755777,21.198859158708],[81.1565194329891,21.198865365331304],[81.15654729604701,21.198868295973522],[81.15657533068526,21.19886790371947],[81.15660308811104,21.19886419484857],[81.15663012396931,21.198857228734308],[81.15710909036616,21.198702651613694],[81.15713480644565,21.198692616069653],[81.15715897695071,21.198679634496592],[81.1571812200086,21.19866391199205],[81.1572011841989,21.198645696958035],[81.15721855410575,21.19862527717617],[81.15723305530092,21.19860297526119],[81.15724445867978,21.198579143563595],[81.1572525840805,21.19855415860283],[81.1572573031308,21.19852841511904],[81.15731256771447,21.198047508056423],[81.15731376802916,21.19801981959663],[81.15731102938082,21.19799222753715],[81.15730440046829,21.19796522250072],[81.15729399916421,21.197939284671502],[81.15728001041894,21.19791487525728],[81.15726268297182,21.197892428288284],[81.15700347531791,21.19759988222633],[81.15806578245038,21.195521579843607],[81.15807605075916,21.195497836494543],[81.1580831254837,21.19547308859064],[81.15808689908317,21.195447712322707],[81.15808731419735,21.195422093433297],[81.15808436451765,21.195396621352586],[81.15807809488368,21.195371683279113],[81.15806860060098,21.195347658293823],[81.15805602599261,21.195324911597563],[81.15645333057125,21.192817257521707],[81.15643749203588,21.192795685673932],[81.15641888609171,21.19277615349095],[81.1563978082765,21.19275897122183],[81.15637459339109,21.19274441178986],[81.15634961018111,21.19273270645689],[81.15632325547985,21.192724041150285],[81.15629594790525,21.19271855350981],[81.1540853393247,21.192409387913486]]]}

In [9]:
x_pt = build_feature_cube(DATA_DIR, AOI)
x_pt



In [10]:
mine_id = "MINE_001"   # ‚Üê choose your mine identifier

baseline_model = train_baseline_model(
    x_pt=x_pt,
    mine_id=mine_id
)



üß† Training baseline model for mine: MINE_001
‚úî Training samples: 99792
‚úî Features used: ('NDVI', 'NBR', 'BSI', 'B11', 'B12')

üìä PCA Explained Variance Ratio:
  PC1: 0.5295
  PC2: 0.3494
  PC3: 0.1068

üìà Cumulative variance: 0.9857

üß™ Baseline Anomaly Score Stats:
  Mean : 0.014296
  Std  : 0.034661
  P95  : 0.066372
  P99  : 0.128322
  Max  : 1.116392

üíæ Baseline model saved to:
   ./baseline_models/MINE_001_baseline_model.joblib
