In [1]:
from data_prep.data_prep import prepare_all_data
from training.training import prepare_model_data, grid_search
from training.feature_selection import forward_feature_selection, correlation_feature_selection,projection_predictive_selection
from model import burglary_model
from utils.utils import single_out_last, setup_reproducibility
from testing.testing import StatisticalTester
from pipeline import train_and_evaluate_model
model_tuple, occupation_mappings,ward_idx_map = prepare_all_data("../merged_data.parquet", "lsoa")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = setup_reproducibility(42)
print(f'Using device: {device}')
training_data, testing_data = single_out_last(model_tuple[0])

Using device: cuda


In [3]:
# #Automated Feature Selection Block 
# #Correlation Feature Selection: Computationally Light
# #FORWARD Feature Selection: Computationally Heavy, More Accurate but needs a GPU
# inner_train,inner_val = single_out_last(training_data)
# candidate_features = {
#     "static": model_tuple[1],
#     'dynamic': model_tuple[2],
#     'seasonal': model_tuple[3],
#     'time_trend': model_tuple[4],
#     'temporal': model_tuple[5],
#     'spatial': model_tuple[6],
#     }
# #selected_feats = correlation_feature_selection(inner_train,candidate_features, max_features=12,print_progress=True)
# selected_feats=projection_predictive_selection(
#     burglary_model,
#     inner_train,
#     inner_val,
#     candidate_features,
#     device,
#     num_steps=200,
#     lr=1e-3,
#     guide_type='diag',
#     verbose=False,
#     max_features=10,
#     print_progress=True
# )
# print(f"Selected features: {selected_feats}")
# #Forward Feature Selection:
# #selected_feats = forward_feature_selection(burglary_model,inner_train,inner_val,candidate_features,device,num_steps=200,lr=1e-3,guide_type='diag',verbose=False max_features=10, print_progress=True)
# model_tuple = (
#     model_tuple[0],
#     selected_feats['static'],
#     selected_feats['dynamic'],
#     selected_feats['seasonal'],
#     selected_feats['time_trend'],
#     selected_feats['temporal'],
#     selected_feats['spatial'],
#     )

In [4]:
train_data = prepare_model_data(training_data, *model_tuple[1:], device, ward_idx_map=ward_idx_map)
test_data = prepare_model_data(testing_data, *model_tuple[1:], device, train_data["means"], train_data["stds"], ward_idx_map)

evaluation_metrics, svi, svi.guide, prediction_tester = train_and_evaluate_model(train_data, test_data, burglary_model, occupation_mappings[1])

Training SVI: 100%|██████████| 500/500 [00:25<00:00, 19.81it/s]


In [7]:
evaluation_metrics

{'rmse': np.float32(2.4565933),
 'mae': np.float32(1.4824739),
 'crps': 2.213439655641901}

In [5]:
factors_map ={
    "b_static": model_tuple[1],
    "b_dynamic": model_tuple[2],
    "b_seasonal": model_tuple[3],
    "b_time_tr": model_tuple[4],
    "b_temporal": model_tuple[5],
    "b_spatial": model_tuple[6],
}


statistical_tester = StatisticalTester(test_data, burglary_model, svi.guide, factors_map)

In [6]:
statistical_tester.predict(5_000)

KeyboardInterrupt: 

In [None]:
factor_summaries= statistical_tester.evaluate_all()
print(factor_summaries)

In [None]:

import pandas as pd
inner_train, inner_val = single_out_last(training_data)
param_grid = {
      "lr": [1e-2, 5e-3, 1e-3],
      "guide_type": ["diag", "lowrank"]
      }
results_df = grid_search(
      burglary_model,
      inner_train,
      inner_val,
      *model_tuple[1:],
      device,
      param_grid,
      ward_idx_map=ward_idx_map,
      num_steps=500,
      )
print(results_df)
print("Best parameters:", results_df.loc[0].to_dict())