In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

from model.estimator import GARegressor

# Basic Usage

## Step 1. Load the dataset

In [2]:
# Specify column names for the dataset. Here is the poverty dataset.
tab_x = ['ep_unem', 'ep_pci', 'ep_nohs', 'ep_sngp',
         'ep_lime', 'ep_crow', 'ep_nove', 'rent_1', 'rntov30p_1',
         'ep_unin', 'ep_minrty', 'ep_age65', 'ep_age17', 'ep_disabl']
tab_l = ['latitude', 'longitude']
tab_y = ['ep_pov']

# Load the tabular dataset.
df = pd.read_csv(r'./data/tabular_datasets/us_sdoh_2014.csv')
X, y = df[tab_x + tab_l], df[tab_y]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Step 2. sklearn-style training

In [3]:
# Specify the hyperparameters for the GA model.
# Check the docstring of`GeoAggregator` class for details.
params = {
    'x_cols': tab_x,
    'spa_cols': tab_l,
    'y_cols': tab_y,
    'attn_variant': 'MCPA',
    'model_variant': 'small',
    'd_model': 32,
    # 'n_attn_layer': 1,
    # 'idu_points': 4,
    # 'seq_len': 128,
    'attn_dropout': 0.2,
    'attn_bias_factor': None,
    'reg_lin_dims': [16, 1],
    'epochs': 20,
    'lr': 5e-3,
    'batch_size': 8,
    'verbose': True   # show model summary
}

# Initialize the GA model.
model = GARegressor(
    **params
)

# Train the GA model (need to pass co-variates, spatial coordinates and target variable).
model.fit(X=X_train[tab_x], l=X_train[tab_l], y=y_train)

Using the model template: GA-mini.

        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              2
        # inducing point                               4
        # sequence length                            128
        regressor neurons                            [1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate                       0.2
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                        9
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.17665
[INFO] Epoch:  1/9  |  Step:   0/6292  |  loss_step_avg: 17.5962  |  lr: 0.0002  |  abf: 0.0002
[INFO] Epoch:  1/9  |  Step: 100/6292  |  loss_step_avg: 12.7052  |  lr: 0.0003  |  abf: 0.0027
[INFO] Epoch:  1/9  |  Step: 200/6292  |  loss_step_avg: 10.0825  |  lr: 0.0006  |  abf: 0.0181
[INFO] Epoch:  1/9  |  Step: 300/6292  |  loss_step_avg: 8.4937  |  lr: 0.0010  |  abf: 0.0344
[INFO] Epoch:  1/9  |  Step: 400/6292  |  loss_step_avg: 6.9037  |  lr: 0.0015  |  abf: 0.0959
[INFO] Epoch:  1/9  |  Step: 500/6292  |  loss_step_avg: 6.0964  |  lr: 0.0022  |  abf: 0.1790
[INFO] Epoch:  1/9  |  Step: 600/6292  |  loss_step_avg: 5.4560  |  lr: 0.0028  |  abf: 0.2255
[INFO] Epoch:  1/9  |  Step: 700/6292  |  loss_step_avg: 5.3379  |  lr: 0.0035  |  abf: 0.2972
[INFO] Epoch:  1/9  |  Step: 800/6292  |  loss_step_avg: 4.9774  |  lr: 0.0041  |  abf: 0.4053
[INFO] Epoch:  1/9  |  Step: 900/6292  |  loss_step_avg: 4.9535  

## Step 3. sklearn-style testing

In [4]:
# Predict on the GA test dataset.
y_pred = model.predict(X=X_test[tab_x], l=X_test[tab_l])

print(f'R-sq = {r2_score(y_true=y_test[tab_y], y_pred=y_pred)}')
print(f'MAE = {mean_absolute_error(y_true=y_test[tab_y], y_pred=y_pred)}')

Inferencing: 21571it [03:56, 91.19it/s]

R-sq = 0.8262247254457686
MAE = 3.6909148484398





# Hyperparameter Tuning [optional]

In [5]:
import time
import numpy as np
import optuna

from optuna.samplers import TPESampler
from sklearn.model_selection import KFold

In [6]:
def objective(trial, n_split=5):
    params = {
        'x_cols': tab_x,
        'spa_cols': tab_l,
        'y_cols': tab_y,
        'attn_variant': 'MCPA',
        'd_model': trial.suggest_categorical('d_model', [32, 64, 80]),
        'n_attn_layer': trial.suggest_int('n_attn_layer', 1, 3),
        'idu_points': trial.suggest_int('idu_points', 2, 8),
        'seq_len': trial.suggest_categorical('seq_len', [64, 81, 100, 144, 256, 400]),
        'attn_dropout': trial.suggest_float('attn_dropout', 0.01, 0.5),
        'attn_bias_factor': None,
        'reg_lin_dims': trial.suggest_categorical('reg_lin_dims', [[1], [4, 1], [16, 1]]),
        'epochs': trial.suggest_int('epochs', 3, 30),
        'lr': 5e-3,
        'batch_size': 8,
    }
    loss = np.empty(n_split)
    kf = KFold(n_splits=n_split, shuffle=True)

    for idx, (trn_idx, val_idx) in enumerate(kf.split(X_train, y_train)):
        trn_X, trn_y = X_train.iloc[trn_idx], y_train.iloc[trn_idx]
        val_X, val_y = X_train.iloc[val_idx], y_train.iloc[val_idx]

        model = GARegressor(**params)
        model.fit(
            X=trn_X[tab_x],
            l=trn_X[tab_l],
            y=trn_y
        )
        y_pred = model.predict(X=val_X[tab_x], l=val_X[tab_l])
        loss[idx] = mean_absolute_error(y_true=val_y, y_pred=y_pred)

    return np.mean(loss)

In [7]:
sampler = TPESampler()
start_time = time.time()
study = optuna.create_study(
    direction='minimize',
    study_name='ga-hp!',
    sampler=sampler
)
study.optimize(objective, n_trials=300)
end_time = time.time()

best_params = study.best_params
best_value = study.best_value
best_trial = study.best_trial

print('Elapsed time = {:.4f}s'.format(end_time - start_time))
print('Best hyperparameters: ', best_params)
print('Best results: ', best_value)
print('Best trial: ', best_trial)

[I 2025-02-03 00:42:03,014] A new study created in memory with name: ga-hp!



        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              3
        # inducing point                               7
        # sequence length                             64
        regressor neurons                            [1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.24359313693372311
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                        3
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.12913
[INFO] Epoch:  1/3  |  Step:   0/5033  |  loss_step_avg: 10.9589  |  lr: 0.0002  |  abf: 0.0002
[INFO] Epoch:  1/3  |  Step: 100/5033  |  loss_step_avg: 10.8516  |  lr: 0.0014  |  abf: 0.0005
[INFO] Epoch:  1/3  |  Step: 200/5033  |  loss_step_avg: 8.8506  |  lr: 0.0038  |  abf: 0.1002
[INFO] Epoch:  1/3  |  Step: 300/5033  |  loss_step_avg: 6.1895  |  lr: 0.0050  |  abf: 0.0507
[INFO] Epoch:  1/3  |  Step: 400/5033  |  loss_step_avg: 5.1357  |  lr: 0.0050  |  abf: -0.0959
[INFO] Epoch:  1/3  |  Step: 500/5033  |  loss_step_avg: 5.1231  |  lr: 0.0050  |  abf: -0.1022
[INFO] Epoch:  1/3  |  Step: 600/5033  |  loss_step_avg: 5.1873  |  lr: 0.0050  |  abf: -0.0587
[INFO] Epoch:  1/3  |  Step: 700/5033  |  loss_step_avg: 5.0825  |  lr: 0.0050  |  abf: -0.0963
[INFO] Epoch:  1/3  |  Step: 800/5033  |  loss_step_avg: 5.1190  |  lr: 0.0050  |  abf: -0.1211
[INFO] Epoch:  1/3  |  Step: 900/5033  |  loss_step_avg: 4.57


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              3
        # inducing point                               7
        # sequence length                             64
        regressor neurons                            [1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.24359313693372311
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                        3
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.12795
[INFO] Epoch:  1/3  |  Step:   0/5033  |  loss_step_avg: 9.8612  |  lr: 0.0002  |  abf: 0.0002
[INFO] Epoch:  1/3  |  Step: 100/5033  |  loss_step_avg: 10.1201  |  lr: 0.0014  |  abf: -0.0023
[INFO] Epoch:  1/3  |  Step: 200/5033  |  loss_step_avg: 7.9674  |  lr: 0.0038  |  abf: 0.1521
[INFO] Epoch:  1/3  |  Step: 300/5033  |  loss_step_avg: 5.8333  |  lr: 0.0050  |  abf: 0.1577
[INFO] Epoch:  1/3  |  Step: 400/5033  |  loss_step_avg: 5.3174  |  lr: 0.0050  |  abf: 0.2185
[INFO] Epoch:  1/3  |  Step: 500/5033  |  loss_step_avg: 5.1475  |  lr: 0.0050  |  abf: 0.2360
[INFO] Epoch:  1/3  |  Step: 600/5033  |  loss_step_avg: 4.8365  |  lr: 0.0050  |  abf: 0.2739
[INFO] Epoch:  1/3  |  Step: 700/5033  |  loss_step_avg: 5.0480  |  lr: 0.0050  |  abf: 0.2892
[INFO] Epoch:  1/3  |  Step: 800/5033  |  loss_step_avg: 5.0592  |  lr: 0.0050  |  abf: 0.2996
[INFO] Epoch:  1/3  |  Step: 900/5033  |  loss_step_avg: 4.8521  |


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              3
        # inducing point                               7
        # sequence length                             64
        regressor neurons                            [1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.24359313693372311
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                        3
        


[INFO] Radius estimation ends after 25 iterations. Estimated radius: 0.12909
[INFO] Epoch:  1/3  |  Step:   0/5033  |  loss_step_avg: 26.5861  |  lr: 0.0002  |  abf: 0.0002
[INFO] Epoch:  1/3  |  Step: 100/5033  |  loss_step_avg: 15.8620  |  lr: 0.0014  |  abf: 0.0079
[INFO] Epoch:  1/3  |  Step: 200/5033  |  loss_step_avg: 9.7330  |  lr: 0.0038  |  abf: 0.1866
[INFO] Epoch:  1/3  |  Step: 300/5033  |  loss_step_avg: 6.2381  |  lr: 0.0050  |  abf: 0.2044
[INFO] Epoch:  1/3  |  Step: 400/5033  |  loss_step_avg: 5.1317  |  lr: 0.0050  |  abf: -0.0367
[INFO] Epoch:  1/3  |  Step: 500/5033  |  loss_step_avg: 5.4969  |  lr: 0.0050  |  abf: -0.0563
[INFO] Epoch:  1/3  |  Step: 600/5033  |  loss_step_avg: 4.8268  |  lr: 0.0050  |  abf: -0.0815
[INFO] Epoch:  1/3  |  Step: 700/5033  |  loss_step_avg: 5.0190  |  lr: 0.0050  |  abf: -0.1019
[INFO] Epoch:  1/3  |  Step: 800/5033  |  loss_step_avg: 4.8670  |  lr: 0.0050  |  abf: -0.0753
[INFO] Epoch:  1/3  |  Step: 900/5033  |  loss_step_avg: 4.81


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              3
        # inducing point                               7
        # sequence length                             64
        regressor neurons                            [1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.24359313693372311
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                        3
        


[INFO] Radius estimation ends after 20 iterations. Estimated radius: 0.12933
[INFO] Epoch:  1/3  |  Step:   0/5033  |  loss_step_avg: 21.0911  |  lr: 0.0002  |  abf: -0.0002
[INFO] Epoch:  1/3  |  Step: 100/5033  |  loss_step_avg: 18.9754  |  lr: 0.0014  |  abf: 0.0210
[INFO] Epoch:  1/3  |  Step: 200/5033  |  loss_step_avg: 10.0014  |  lr: 0.0038  |  abf: 0.0429
[INFO] Epoch:  1/3  |  Step: 300/5033  |  loss_step_avg: 6.5177  |  lr: 0.0050  |  abf: 0.2008
[INFO] Epoch:  1/3  |  Step: 400/5033  |  loss_step_avg: 5.4202  |  lr: 0.0050  |  abf: 0.2721
[INFO] Epoch:  1/3  |  Step: 500/5033  |  loss_step_avg: 4.6968  |  lr: 0.0050  |  abf: 0.2893
[INFO] Epoch:  1/3  |  Step: 600/5033  |  loss_step_avg: 4.7444  |  lr: 0.0050  |  abf: 0.2330
[INFO] Epoch:  1/3  |  Step: 700/5033  |  loss_step_avg: 4.7022  |  lr: 0.0050  |  abf: 0.2059
[INFO] Epoch:  1/3  |  Step: 800/5033  |  loss_step_avg: 5.0853  |  lr: 0.0050  |  abf: 0.2147
[INFO] Epoch:  1/3  |  Step: 900/5033  |  loss_step_avg: 4.7532 


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              3
        # inducing point                               7
        # sequence length                             64
        regressor neurons                            [1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.24359313693372311
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                        3
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.12914
[INFO] Epoch:  1/3  |  Step:   0/5033  |  loss_step_avg: 11.5270  |  lr: 0.0002  |  abf: -0.0002
[INFO] Epoch:  1/3  |  Step: 100/5033  |  loss_step_avg: 9.3378  |  lr: 0.0014  |  abf: 0.0176
[INFO] Epoch:  1/3  |  Step: 200/5033  |  loss_step_avg: 7.3227  |  lr: 0.0038  |  abf: 0.1194
[INFO] Epoch:  1/3  |  Step: 300/5033  |  loss_step_avg: 5.9270  |  lr: 0.0050  |  abf: 0.0505
[INFO] Epoch:  1/3  |  Step: 400/5033  |  loss_step_avg: 5.1754  |  lr: 0.0050  |  abf: 0.0531
[INFO] Epoch:  1/3  |  Step: 500/5033  |  loss_step_avg: 5.4562  |  lr: 0.0050  |  abf: 0.0519
[INFO] Epoch:  1/3  |  Step: 600/5033  |  loss_step_avg: 5.0370  |  lr: 0.0050  |  abf: 0.0664
[INFO] Epoch:  1/3  |  Step: 700/5033  |  loss_step_avg: 4.6345  |  lr: 0.0050  |  abf: 0.0757
[INFO] Epoch:  1/3  |  Step: 800/5033  |  loss_step_avg: 4.9497  |  lr: 0.0050  |  abf: 0.0756
[INFO] Epoch:  1/3  |  Step: 900/5033  |  loss_step_avg: 4.6382  |


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              1
        # inducing point                               5
        # sequence length                            144
        regressor neurons                        [16, 1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.4066303632372607
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                       21
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.22625
[INFO] Epoch:  1/21  |  Step:   0/5033  |  loss_step_avg: 15.3202  |  lr: 0.0002  |  abf: -0.0002
[INFO] Epoch:  1/21  |  Step: 100/5033  |  loss_step_avg: 19.7671  |  lr: 0.0002  |  abf: 0.0015
[INFO] Epoch:  1/21  |  Step: 200/5033  |  loss_step_avg: 12.2054  |  lr: 0.0003  |  abf: 0.0060
[INFO] Epoch:  1/21  |  Step: 300/5033  |  loss_step_avg: 9.1930  |  lr: 0.0004  |  abf: 0.0242
[INFO] Epoch:  1/21  |  Step: 400/5033  |  loss_step_avg: 7.8600  |  lr: 0.0006  |  abf: 0.0482
[INFO] Epoch:  1/21  |  Step: 500/5033  |  loss_step_avg: 5.8245  |  lr: 0.0008  |  abf: 0.0563
[INFO] Epoch:  1/21  |  Step: 600/5033  |  loss_step_avg: 5.4778  |  lr: 0.0011  |  abf: 0.0822
[INFO] Epoch:  1/21  |  Step: 700/5033  |  loss_step_avg: 5.0221  |  lr: 0.0014  |  abf: 0.1166
[INFO] Epoch:  1/21  |  Step: 800/5033  |  loss_step_avg: 5.1425  |  lr: 0.0017  |  abf: 0.1620
[INFO] Epoch:  1/21  |  Step: 900/5033  |  loss_step_av


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              1
        # inducing point                               5
        # sequence length                            144
        regressor neurons                        [16, 1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.4066303632372607
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                       21
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.22583
[INFO] Epoch:  1/21  |  Step:   0/5033  |  loss_step_avg: 6.0670  |  lr: 0.0002  |  abf: -0.0002
[INFO] Epoch:  1/21  |  Step: 100/5033  |  loss_step_avg: 10.8187  |  lr: 0.0002  |  abf: 0.0007
[INFO] Epoch:  1/21  |  Step: 200/5033  |  loss_step_avg: 9.7087  |  lr: 0.0003  |  abf: 0.0097
[INFO] Epoch:  1/21  |  Step: 300/5033  |  loss_step_avg: 8.0842  |  lr: 0.0004  |  abf: 0.0152
[INFO] Epoch:  1/21  |  Step: 400/5033  |  loss_step_avg: 7.2776  |  lr: 0.0006  |  abf: 0.0051
[INFO] Epoch:  1/21  |  Step: 500/5033  |  loss_step_avg: 6.4611  |  lr: 0.0008  |  abf: -0.0161
[INFO] Epoch:  1/21  |  Step: 600/5033  |  loss_step_avg: 5.2395  |  lr: 0.0011  |  abf: 0.0011
[INFO] Epoch:  1/21  |  Step: 700/5033  |  loss_step_avg: 4.8194  |  lr: 0.0014  |  abf: 0.0240
[INFO] Epoch:  1/21  |  Step: 800/5033  |  loss_step_avg: 5.2457  |  lr: 0.0017  |  abf: 0.0731
[INFO] Epoch:  1/21  |  Step: 900/5033  |  loss_step_avg


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              1
        # inducing point                               5
        # sequence length                            144
        regressor neurons                        [16, 1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.4066303632372607
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                       21
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.22514
[INFO] Epoch:  1/21  |  Step:   0/5033  |  loss_step_avg: 17.3102  |  lr: 0.0002  |  abf: 0.0002
[INFO] Epoch:  1/21  |  Step: 100/5033  |  loss_step_avg: 17.2409  |  lr: 0.0002  |  abf: -0.0014
[INFO] Epoch:  1/21  |  Step: 200/5033  |  loss_step_avg: 10.4332  |  lr: 0.0003  |  abf: -0.0014
[INFO] Epoch:  1/21  |  Step: 300/5033  |  loss_step_avg: 9.4848  |  lr: 0.0004  |  abf: -0.0033
[INFO] Epoch:  1/21  |  Step: 400/5033  |  loss_step_avg: 7.8059  |  lr: 0.0006  |  abf: 0.0173
[INFO] Epoch:  1/21  |  Step: 500/5033  |  loss_step_avg: 6.8262  |  lr: 0.0008  |  abf: 0.0138
[INFO] Epoch:  1/21  |  Step: 600/5033  |  loss_step_avg: 5.6719  |  lr: 0.0011  |  abf: 0.0005
[INFO] Epoch:  1/21  |  Step: 700/5033  |  loss_step_avg: 5.3403  |  lr: 0.0014  |  abf: 0.0191
[INFO] Epoch:  1/21  |  Step: 800/5033  |  loss_step_avg: 4.7020  |  lr: 0.0017  |  abf: 0.0586
[INFO] Epoch:  1/21  |  Step: 900/5033  |  loss_step_


        __________ GeoAggregator Model Summary ___________
        attention mechanism type                    MCPA
        d_model                                       32
        # attention layer                              1
        # inducing point                               5
        # sequence length                            144
        regressor neurons                        [16, 1]
        
        ________________ training details ________________
        Training on device                           cpu
        attention dropout rate        0.4066303632372607
        maximum learning rate                      0.005
        batch_size                                     8
        # epoch                                       21
        


[INFO] Radius estimation ends after 30 iterations. Estimated radius: 0.22538
[INFO] Epoch:  1/21  |  Step:   0/5033  |  loss_step_avg: 13.4813  |  lr: 0.0002  |  abf: -0.0002
[INFO] Epoch:  1/21  |  Step: 100/5033  |  loss_step_avg: 15.0411  |  lr: 0.0002  |  abf: -0.0065
[INFO] Epoch:  1/21  |  Step: 200/5033  |  loss_step_avg: 10.4446  |  lr: 0.0003  |  abf: -0.0164
[INFO] Epoch:  1/21  |  Step: 300/5033  |  loss_step_avg: 9.1346  |  lr: 0.0004  |  abf: -0.0146
[INFO] Epoch:  1/21  |  Step: 400/5033  |  loss_step_avg: 8.8750  |  lr: 0.0006  |  abf: 0.0478
[INFO] Epoch:  1/21  |  Step: 500/5033  |  loss_step_avg: 6.8839  |  lr: 0.0008  |  abf: 0.0663
[INFO] Epoch:  1/21  |  Step: 600/5033  |  loss_step_avg: 5.2212  |  lr: 0.0011  |  abf: 0.0716
[INFO] Epoch:  1/21  |  Step: 700/5033  |  loss_step_avg: 5.3541  |  lr: 0.0014  |  abf: 0.0755
[INFO] Epoch:  1/21  |  Step: 800/5033  |  loss_step_avg: 4.8801  |  lr: 0.0017  |  abf: 0.0918
[INFO] Epoch:  1/21  |  Step: 900/5033  |  loss_step