In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from geodatasets import get_path
from sklearn import metrics, preprocessing
from sklearn.utils.estimator_checks import check_estimator

from gwlearn.ensemble import GWGradientBoostingClassifier, GWRandomForestClassifier
from gwlearn.linear_model import GWLogisticRegression
from gwlearn.search import BandwidthSearch


In [2]:
gdf = gpd.read_file(get_path("geoda.ncovr"))

In [3]:
gdf.shape

(3085, 70)

In [4]:
# It is in the geographic coords in the  US and we need to work with distances. Re-project and use only points as the graph builder will require points anyway.
gdf = gdf.set_geometry(gdf.representative_point()).to_crs(5070)

In [5]:
y = gdf["FH90"] > gdf["FH90"].median()

In [6]:
gwlr = GWLogisticRegression(
    geometry=gdf.geometry,
    bandwidth=900_000,
    fixed=True,
    n_jobs=-1,
    keep_models=True,
    max_iter=500,
)
# gwlr.fit(
#     pd.DataFrame(
#         preprocessing.scale(gdf.iloc[:, 9:15]), columns=gdf.iloc[:, 9:15].columns
#     ),
#     gdf["FH90"] > gdf["FH90"].median(),
# )

In [12]:
check_estimator(gwlr, on_fail='warn')

                     geometry=0          POINT (82379.611 2869738.377)
1       POINT (-1668934.975 3018181.842)
2       POINT (-1619958.935 3006537.453)
3         POINT (-1755117.16 3050487.11)
4       POINT (-1577383.387 2998650.582)
                      ...               
3080    POINT (-1657849.621 1282663.159)
3081    POINT (-1087856.066 1384868.843)
3082      POINT (1699672.63 1751264.588)
3083      POINT (1582459.36 1898177.771)
3084     POINT (-1155816.554 2564833.36)
Name: geometry, Length: 3085, dtype: geometry,
                     keep_models=True).
Expected to fail reason: Not expected to fail
Exception: Only binary dependent variable is supported.
                     geometry=0          POINT (82379.611 2869738.377)
1       POINT (-1668934.975 3018181.842)
2       POINT (-1619958.935 3006537.453)
3         POINT (-1755117.16 3050487.11)
4       POINT (-1577383.387 2998650.582)
                      ...               
3080    POINT (-1657849.621 1282663.159)
3081    POINT

[{'estimator': GWLogisticRegression(bandwidth=900000, fixed=True,
                       geometry=0          POINT (82379.611 2869738.377)
  1       POINT (-1668934.975 3018181.842)
  2       POINT (-1619958.935 3006537.453)
  3         POINT (-1755117.16 3050487.11)
  4       POINT (-1577383.387 2998650.582)
                        ...               
  3080    POINT (-1657849.621 1282663.159)
  3081    POINT (-1087856.066 1384868.843)
  3082      POINT (1699672.63 1751264.588)
  3083      POINT (1582459.36 1898177.771)
  3084     POINT (-1155816.554 2564833.36)
  Name: geometry, Length: 3085, dtype: geometry,
                       keep_models=True),
  'check_name': 'check_estimator_cloneable',
  'exception': None,
  'status': 'passed',
  'expected_to_fail': False,
  'expected_to_fail_reason': 'Check is not expected to fail'},
 {'estimator': GWLogisticRegression(bandwidth=900000, fixed=True,
                       geometry=0          POINT (82379.611 2869738.377)
  1       POINT (-166

In [8]:
unique_values = set(np.unique(y))

In [11]:
unique_values

{np.False_, np.True_}

In [12]:
gwlr.feature_names_in_

AttributeError: 'GWLogisticRegression' object has no attribute 'feature_names_in_'

In [13]:
gwlr.global_model.feature_names_in_

array(['HR60', 'HR70', 'HR80', 'HR90', 'HC60', 'HC70'], dtype=object)

In [14]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
clf.fit(X, y)
clf.predict([[0, 0, 0, 0]])
clf.score(X, y)

0.96

In [15]:
clf.feature_names_in_

AttributeError: 'AdaBoostClassifier' object has no attribute 'feature_names_in_'