In [1]:
import os
import joblib
import pandas as pd

In [None]:
models_path = f'{os.getcwd()}/../outputs/models'

In [None]:
# ================================================================
# 1. LOAD MODEL, THRESHOLD, METADATA
# ================================================================
def load_model(model_path, threshold_path, metadata_path):
    """
    Load trained sklearn pipeline, threshold, and metadata.
    """
    pipeline = joblib.load(model_path)
    threshold = joblib.load(threshold_path)
    metadata = joblib.load(metadata_path)
    return pipeline, threshold, metadata


# ================================================================
# 2. PREDICT USING PIPELINE + THRESHOLD
# ================================================================
def predict_with_model(pipeline, threshold, df):
    """
    Predict class labels and probabilities using the trained pipeline
    and decision threshold.
    """
    probs = pipeline.predict_proba(df)[:, 1]
    preds = (probs >= threshold).astype(int)
    return preds, probs


# ================================================================
# 3. PRETTY OUTPUT WRAPPER
# ================================================================
def predict_pretty(pipeline, threshold, df, k=5):
    """
    Prints first k predictions with probabilities in a small dataframe.
    """
    preds, probs = predict_with_model(pipeline, threshold, df)
    out = pd.DataFrame({
        "prediction": preds,
        "probability": probs
    })
    print(out.head(k))
    return out

In [None]:
example1 = pd.DataFrame([{
    "site_green_belt": 0,
    "agricultural_holding": 0,
    "development_affect_setting_of_listed_building": 0,
    "historic_building_grant_made": 0,
    "in_ca_relates_to_ca": 0,
    "is_flooding_an_issue": 0,
    "is_the_site_within_an_aonb": 0,
    "is_site_within_an_sssi": 0,

    "area_of_site_in_hectares": 0.12,
    "floor_space_in_square_metres": 85,
    "number_of_residences": 1,

    "procedure": "Written",
    "development_type": "Householder",
    "reason_for_the_appeal": "Refusal of planning permission",
    "type_detail": "Extension",
    "type_of_casework": "Planning Appeal",

    "lpa_name": "Camden",
    "postcode_district": "NW3",

    "appeal_type_reason": "Appeal against refusal of a small rear extension. No heritage or flood issues."
}])

In [None]:
example2 = pd.DataFrame([{
    "site_green_belt": 1,
    "agricultural_holding": 1,
    "development_affect_setting_of_listed_building": 1,
    "historic_building_grant_made": 0,
    "in_ca_relates_to_ca": 1,
    "is_flooding_an_issue": 1,
    "is_the_site_within_an_aonb": 1,
    "is_site_within_an_sssi": 1,

    "area_of_site_in_hectares": 4.5,
    "floor_space_in_square_metres": 1200,
    "number_of_residences": 12,

    "procedure": "Hearing",
    "development_type": "Major Development",
    "reason_for_the_appeal": "Enforcement notice",
    "type_detail": "Change of use",
    "type_of_casework": "Enforcement",

    "lpa_name": "Bromley",
    "postcode_district": "BR6",

    "appeal_type_reason": "Major development in green belt with heritage concerns and flood risk. Non-compliant use."
}])

In [None]:
pipeline, threshold, meta = load_model(
    model_path=f"{models_path}/appeals/LightGBM.joblib",
    threshold_path=f"{models_path}/appeals/LightGBM_threshold.pkl",
    metadata_path=f"{models_path}/appeals/LightGBM_metadata.pkl"
)

print("Loaded model:", meta["model_name"])
print("Threshold:", threshold)

print("\n--- Example 1 ---")
predict_pretty(pipeline, threshold, example1)

print("\n--- Example 2 ---")
predict_pretty(pipeline, threshold, example2)