<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [2]</a>'.</span>

# Vegetation Growth Predictor
## VIGIL Risk Planning - ML Model 2 of 4

This notebook trains a Random Forest model to predict vegetation growth and encroachment timing:
- Species-specific growth rates
- Seasonal growth patterns
- Environmental factors (moisture, temperature)
- Days until conductor contact

**Output:** Predictions saved to `ML.VEGETATION_GROWTH_PREDICTION`

In [1]:
# Import libraries
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, lit, when, datediff, current_date, month
from snowflake.ml.modeling.ensemble import RandomForestRegressor
from snowflake.ml.modeling.preprocessing import StandardScaler, OneHotEncoder
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.registry import Registry
import pandas as pd
import numpy as np
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [2]:
# Get active session
session = Session.builder.getOrCreate()
session.use_database("RISK_PLANNING_DB")
session.use_schema("ML")
session.use_warehouse("RISK_ML_WH")

print(f"Connected to: {session.get_current_account()}")

Error: Default connection with name 'demo' cannot be found, known ones are ['my_snowflake']

In [None]:
# Load vegetation encroachment data with asset context
veg_df = session.sql("""
    SELECT 
        v.ENCROACHMENT_ID,
        v.ASSET_ID,
        v.SPECIES,
        v.CURRENT_CLEARANCE_FT,
        v.REQUIRED_CLEARANCE_FT,
        v.GROWTH_RATE_FT_YEAR,
        v.DAYS_TO_CONTACT,
        v.FIRE_THREAT_DISTRICT,
        a.REGION,
        MONTH(v.DETECTED_DATE) as DETECTION_MONTH,
        -- Seasonal growth factor (spring/summer = high growth)
        CASE 
            WHEN MONTH(CURRENT_DATE()) IN (3,4,5,6) THEN 1.5  -- Spring: peak growth
            WHEN MONTH(CURRENT_DATE()) IN (7,8) THEN 1.2      -- Summer: moderate growth
            WHEN MONTH(CURRENT_DATE()) IN (9,10,11) THEN 0.8  -- Fall: slowing
            ELSE 0.3                                           -- Winter: minimal
        END as SEASONAL_FACTOR
    FROM ATOMIC.VEGETATION_ENCROACHMENT v
    JOIN ATOMIC.ASSET a ON v.ASSET_ID = a.ASSET_ID
    WHERE v.GROWTH_RATE_FT_YEAR IS NOT NULL
""")

print(f"Training samples: {veg_df.count():,}")
veg_df.show(5)

In [None]:
# Define features and target
CATEGORICAL_FEATURES = ["SPECIES", "FIRE_THREAT_DISTRICT", "REGION"]
NUMERIC_FEATURES = ["CURRENT_CLEARANCE_FT", "REQUIRED_CLEARANCE_FT", "DETECTION_MONTH", "SEASONAL_FACTOR"]
TARGET = "DAYS_TO_CONTACT"

# Train/test split
train_df, test_df = veg_df.random_split([0.8, 0.2], seed=42)
print(f"Training set: {train_df.count():,} rows")
print(f"Test set: {test_df.count():,} rows")

In [None]:
# Build ML Pipeline with Random Forest
pipeline = Pipeline(
    steps=[
        ("encoder", OneHotEncoder(input_cols=CATEGORICAL_FEATURES, output_cols="ENCODED_CATS", drop_input_cols=True)),
        ("scaler", StandardScaler(input_cols=NUMERIC_FEATURES, output_cols="SCALED_NUMS")),
        ("model", RandomForestRegressor(
            input_cols=["ENCODED_CATS", "SCALED_NUMS"],
            label_cols=[TARGET],
            output_cols=["PREDICTED_DAYS_TO_CONTACT"],
            n_estimators=100,
            max_depth=10,
            min_samples_leaf=5
        ))
    ]
)

print("Training Vegetation Growth Predictor...")
pipeline.fit(train_df)
print("Training complete!")

In [None]:
# Evaluate model
predictions = pipeline.predict(test_df)
predictions_pd = predictions.select(
    col("DAYS_TO_CONTACT"),
    col("PREDICTED_DAYS_TO_CONTACT")
).to_pandas()

from sklearn.metrics import mean_absolute_error, r2_score
mae = mean_absolute_error(predictions_pd["DAYS_TO_CONTACT"], predictions_pd["PREDICTED_DAYS_TO_CONTACT"])
r2 = r2_score(predictions_pd["DAYS_TO_CONTACT"], predictions_pd["PREDICTED_DAYS_TO_CONTACT"])

print(f"Model Performance:")
print(f"  MAE: {mae:.1f} days")
print(f"  RÂ²: {r2:.3f}")

In [None]:
# Generate predictions for all vegetation records
all_predictions = pipeline.predict(veg_df)

# Save to ML.VEGETATION_GROWTH_PREDICTION
output_df = all_predictions.select(
    lit(f"VGP_{datetime.now().strftime('%Y%m%d%H%M%S')}").alias("PREDICTION_ID"),
    col("ENCROACHMENT_ID"),
    col("ASSET_ID"),
    col("SPECIES"),
    col("PREDICTED_DAYS_TO_CONTACT"),
    (col("GROWTH_RATE_FT_YEAR") * col("SEASONAL_FACTOR")).alias("ADJUSTED_GROWTH_RATE"),
    lit(r2).alias("MODEL_CONFIDENCE"),
    when(col("PREDICTED_DAYS_TO_CONTACT") < 30, lit("P1_EMERGENCY"))
        .when(col("PREDICTED_DAYS_TO_CONTACT") < 90, lit("P2_URGENT"))
        .when(col("PREDICTED_DAYS_TO_CONTACT") < 180, lit("P3_STANDARD"))
        .otherwise(lit("P4_ROUTINE")).alias("RECOMMENDED_PRIORITY"),
    current_date().alias("PREDICTION_DATE"),
    lit("RANDOM_FOREST_V1").alias("MODEL_VERSION")
)

output_df.write.mode("overwrite").save_as_table("ML.VEGETATION_GROWTH_PREDICTION")
print(f"Saved {output_df.count():,} predictions to ML.VEGETATION_GROWTH_PREDICTION")

In [None]:
# Species-level growth rate analysis
species_analysis = session.sql("""
    SELECT 
        SPECIES,
        COUNT(*) as ENCROACHMENT_COUNT,
        ROUND(AVG(GROWTH_RATE_FT_YEAR), 2) as AVG_GROWTH_RATE,
        ROUND(AVG(DAYS_TO_CONTACT), 0) as AVG_DAYS_TO_CONTACT,
        SUM(CASE WHEN COMPLIANCE_STATUS = 'NON_COMPLIANT' THEN 1 ELSE 0 END) as NON_COMPLIANT_COUNT
    FROM ATOMIC.VEGETATION_ENCROACHMENT
    GROUP BY SPECIES
    ORDER BY AVG_GROWTH_RATE DESC
""")

print("\nSpecies Growth Rate Analysis:")
species_analysis.show()

In [None]:
# Register model
registry = Registry(session=session, database_name="RISK_PLANNING_DB", schema_name="ML")

model_version = registry.log_model(
    model=pipeline,
    model_name="VEGETATION_GROWTH_PREDICTOR",
    version_name="V1",
    metrics={"mae": mae, "r2": r2},
    comment="Random Forest model for vegetation growth prediction with seasonal adjustment"
)

print(f"Model registered: VEGETATION_GROWTH_PREDICTOR v1")