In [8]:
import pandas as pd
import geopandas as gpd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from model.estimator import GARegressor
import time
import numpy as np
import optuna
from optuna.samplers import TPESampler
from sklearn.model_selection import KFold
import pickle

In [5]:
with open("../grid_large_std.pkl", "rb") as f:
    grid = pickle.load(f)

In [14]:
def calculate_midpoint(value):
    if value is None or value == "None":
        return np.nan
    try:
        if '-' in value:
            parts = value.split('-')
            # Convert each part to float after stripping whitespace
            low = float(parts[0].strip())
            high = float(parts[1].strip())
            return (low + high) / 2
        else:
            return float(value)
    except Exception:
        return np.nan

In [36]:
grid["Coarse_mid"] = grid.PercentCoarse.apply(calculate_midpoint)

In [24]:
numeric_cols = grid.select_dtypes(include=[np.number]).columns

In [30]:
filtered_numeric_cols = numeric_cols.difference(['latitude', 'longitude', 'Coarse_mid']).to_numpy()

In [44]:
tab_x = list(filtered_numeric_cols)
tab_l = ['latitude', 'longitude']
tab_y = ["Coarse_mid"]

In [45]:
df = grid[~grid.Coarse_mid.isna()]

In [46]:
X, y = df[tab_x + tab_l], df[tab_y]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [50]:
# Specify the hyperparameters for the GA model.
# Check the docstring of`GeoAggregator` class for details.
params = {
    'x_cols': tab_x,
    'spa_cols': tab_l,
    'y_cols': tab_y,
    'attn_variant': 'MCPA',
    'model_variant': 'small',
    'd_model': 32,
    # 'n_attn_layer': 1,
    # 'idu_points': 4,
    # 'seq_len': 128,
    'attn_dropout': 0.2,
    'attn_bias_factor': None,
    'reg_lin_dims': [16, 1],
    'epochs': 20,
    'lr': 5e-3,
    'batch_size': 8,
    'verbose': True   # show model summary
}

# Initialize the GA model.
model = GARegressor(
    **params
)

# Train the GA model (need to pass co-variates, spatial coordinates and target variable).
model.fit(X=X_train[tab_x], l=X_train[tab_l], y=y_train)

Using the model template: GA-small.

        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              2
        # inducing point                               4
        # sequence length                            128
        regressor neurons                        [16, 1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate                       0.2
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                       20
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.07690
[INFO] Epoch:  1/20  |  Step:   0/1343  |  loss_step_avg: 18.0210  |  lr: 0.0002  |  abf: -0.0002
[INFO] Epoch:  1/20  |  Step: 100/1343  |  loss_step_avg: 29.2187  |  lr: 0.0006  |  abf: 0.0024


KeyboardInterrupt: 