# Land Use prediction

In [None]:
import pandas as pd
blocks_gdf = pd.read_pickle('./../../data/saint_petersburg/blocks.pickle')

In [None]:
from blocksnet.analysis.land_use.prediction import SpatialClassifier

In [None]:
classifier = SpatialClassifier.default()
result = classifier.run(blocks_gdf)

In [25]:
result

Unnamed: 0,geometry,category,pred_name,prob_urban,prob_non_urban,prob_industrial
0,"POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",LandUseCategory.URBAN,non_urban,0.351396,0.480452,0.168152
1,"POLYGON ((1 0, 2 0, 2 1, 1 1, 1 0))",LandUseCategory.URBAN,non_urban,0.345072,0.492949,0.161979
2,"POLYGON ((0 1, 1 1, 1 2, 0 2, 0 1))",LandUseCategory.INDUSTRIAL,non_urban,0.344711,0.485752,0.169537


# Land Use Train Mode

In [None]:
import os
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier

from blocksnet.analysis.land_use.prediction import SpatialClassifier
from blocksnet.machine_learning.strategy.sklearn.ensemble.voting.classification_strategy import SKLearnVotingClassificationStrategy

In [None]:
import pandas as pd
blocks_gdf = pd.read_pickle('./../../data/saint_petersburg/blocks.pickle')

In [None]:
# 1. Инициализация и обучение
BASE_PARAMS = {"random_state": 42, "n_jobs": -1}
CPU = max(1, min(8, os.cpu_count() or 1))
MODEL_PARAMS = {
    "rf": {
        "n_estimators": 120,          # было 200
        "max_depth": 7,
        "class_weight": "balanced",
        "max_samples": 0.25,          # 🔴 бэггинг на подвыборке
        "min_samples_leaf": 10,       # стабилизация и меньше узлов
        **BASE_PARAMS
    },
    "xgb": {
        "n_estimators": 150,          # меньше
        "max_depth": 7,
        "learning_rate": 0.05,
        "subsample": 0.8,             # стахастичность
        "colsample_bytree": 0.8,
        "tree_method": "hist",        # память/скорость
        "n_jobs": CPU                 # XGB игнорирует BASE_PARAMS если его стерли
    },
    "lgb": {
        "n_estimators": 200,
        "max_depth": 7,
        "learning_rate": 0.05,
        "class_weight": "balanced",
        "num_threads": CPU            # у LGB параметр другое имя
    },
    "hgb": {
        "max_iter": 200,
        "max_depth": 7,
        "learning_rate": 0.05,
        "random_state": 42
    }
}
estimators = [
    ("rf",  RandomForestClassifier(**MODEL_PARAMS["rf"])),
    ("xgb", XGBClassifier(**MODEL_PARAMS["xgb"])),
    ("lgb", LGBMClassifier(**MODEL_PARAMS["lgb"])),
    ("hgb", HistGradientBoostingClassifier(**MODEL_PARAMS["hgb"])),
]

strategy = SKLearnVotingClassificationStrategy(estimators, {"voting": "soft", "n_jobs": -1})
classifier = SpatialClassifier(strategy, 1000, 5)
score = classifier.train(blocks_gdf)