In [1]:
from data_prep.data_prep import prepare_all_data
from training.training import train_model, prepare_model_data, create_learner
from model import burglary_model
from utils.utils import single_out_last, setup_reproducibility
from testing.testing import PredictionTester, StatisticalTester

model_tuple, occupation_mappings = prepare_all_data("../merged_data.parquet", "lsoa")

In [2]:
device = setup_reproducibility(42)
training_data, testing_data = single_out_last(model_tuple[0])
svi = create_learner(burglary_model)

In [3]:
train_data = prepare_model_data(training_data, *model_tuple[1:], device)
training_results = train_model(train_data, svi, num_steps=500)

Training SVI: 100%|██████████| 500/500 [00:15<00:00, 33.22it/s]


In [4]:
test_data = prepare_model_data(testing_data, *model_tuple[1:], device, training_results[-2], training_results[-1])
prediction_tester = PredictionTester(test_data, burglary_model, svi.guide, occupation_mappings[1])

In [5]:
prediction_tester.predict(5_000)

In [6]:
prediction_tester.get_all_predictions()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999
E01000001,0,1,1,1,0,0,0,0,0,0,...,0,0,1,0,0,0,2,0,0,0
E01000002,0,1,0,0,0,0,1,1,0,0,...,0,1,0,0,0,0,0,1,1,1
E01000003,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
E01000005,0,1,0,0,0,3,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
E01000006,0,1,2,1,0,0,1,2,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
E01035688,1,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,1,1,0,0
E01035689,1,0,0,0,0,0,0,0,1,1,...,1,0,0,1,0,0,1,0,0,1
E01035690,5,0,2,0,1,2,1,1,0,0,...,1,3,0,0,1,2,0,1,0,1
E01035691,0,1,2,0,2,0,0,0,0,0,...,1,1,1,0,1,0,1,2,0,0


In [7]:
prediction_tester.get_confidence_intervals(alpha=0.05)

Unnamed: 0,lower_bound,upper_bound
E01000001,0.0,1.0
E01000002,0.0,1.0
E01000003,0.0,2.0
E01000005,0.0,1.0
E01000006,0.0,2.0
...,...,...
E01035688,0.0,2.0
E01035689,0.0,2.0
E01035690,0.0,5.0
E01035691,0.0,4.0


In [8]:
prediction_tester.get_mean_predictions()

Unnamed: 0,mean
E01000001,0.1654
E01000002,0.1624
E01000003,0.2548
E01000005,0.1710
E01000006,0.3434
...,...
E01035688,0.3400
E01035689,0.2386
E01035690,1.4544
E01035691,0.9742


In [9]:
prediction_tester.get_median_predictions("sample_predictions.parquet")

Unnamed: 0,median
E01000001,0.0
E01000002,0.0
E01000003,0.0
E01000005,0.0
E01000006,0.0
...,...
E01035688,0.0
E01035689,0.0
E01035690,1.0
E01035691,1.0


In [10]:
factors_map ={
    "b_static": model_tuple[1],
    "b_dynamic": model_tuple[2],
    "b_seasonal": model_tuple[3],
    "b_time_tr": model_tuple[4],
    "b_temporal": model_tuple[5],
    "b_spatial": model_tuple[6],
}

statistical_tester = StatisticalTester(test_data, burglary_model, svi.guide, factors_map)

In [12]:
statistical_tester.predict(5_000)

In [13]:
statistical_tester.evaluate_all()

{'b_static':                                                   col      mean  ci_lower  \
 0   Car or van availability|3 cars or vans in hous...  0.362417  0.263632   
 1          Car or van availability|Cars per household  0.440335  0.327118   
 2                Ethnic Group|Asian/Asian British (%)  0.239687  0.149922   
 3                               Ethnic Group|BAME (%) -0.604784 -0.709642   
 4   Ethnic Group|Black/African/Caribbean/Black Bri...  0.269466  0.157570   
 5                              Ethnic Group|White (%) -0.163052 -0.256165   
 6   Public Transport Accessibility Levels|Number o...  0.336769  0.252678   
 7   Public Transport Accessibility Levels|Number o...  0.557695  0.472529   
 8   Public Transport Accessibility Levels|% 0-1 (p...  0.695438  0.603499   
 9   Public Transport Accessibility Levels|% 2-3 (a...  0.754467  0.681606   
 10  Public Transport Accessibility Levels|% 4-6 (g...  0.259292  0.166852   
 11                          Households|All househol