In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Model

This notebook loads your assembled and cleaned data and runs modeling and analysis on it.

- First, we run predictive models
- Then, we analyze your predictions as well as existing ones (such as the assessor's)
- Finally, we generate automated statistical reports assessing their quality

In [None]:
# Change these as desired

# The slug of the locality you are currently working on
locality = "us-md-baltimorecity"

# Whether to print out a lot of stuff (can help with debugging) or stay mostly quiet
verbose = True

# Clear previous state for this notebook and start fresh
clear_checkpoints = True

# Clear cached results for model predictions and start fresh
clear_model_results = True

# 1. Basic setup

In [None]:
import init_notebooks
init_notebooks.setup_environment()

In [None]:
# import a bunch of stuff
from openavmkit.pipeline import (
    init_notebook,
    load_settings,
    read_pickle,
    examine_sup,
    write_canonical_splits,
    try_variables,    
    try_models,
    finalize_models,
    run_and_write_ratio_study_breakdowns,
    from_checkpoint,
    delete_checkpoints
)

In [None]:
init_notebook(locality)

In [None]:
if clear_checkpoints:
    delete_checkpoints("3-model")

In [None]:
settings = load_settings()

# 2. Read & Split

We load the cleaned data from the last checkpoint:

In [None]:
# load the data
sales_univ_pair = read_pickle("out/2-clean-sup")

We separate our test set from our training set.  
This guarantees we have one durable source of truth for test/train set.

In [None]:
write_canonical_splits(
    sales_univ_pair,
    load_settings()
)

In [None]:
examine_sup(sales_univ_pair, settings)

# 3. Experiment

- Examine which variables are likely to be significant
- Experiment with different model types and variables until you get good results.
- Don't proceed past this point until you have good results.

In [None]:
try_variables(
    sup=sales_univ_pair,
    settings=load_settings(),
    verbose=True,
    plot=True
)

This runs all of our modeling code:  
- Run individual models
- Run preliminary ratio studies & horizontal equity studies
- Generate overall utility scores per model
- Optionally generates ensembles of the best individual models
- Do this three times:
  - main : vacant+improved dataset, predicting full market value
  - hedonic : vacant+improved dataset, predicting land value
  - vacant : vacant dataset, predicting land value

In [None]:
try_models(
    sup=sales_univ_pair,
    settings=load_settings(),
    verbose=verbose,
    run_main=True,
    run_vacant=True,
    run_hedonic=True,
    run_ensemble=False,
    use_saved_params=True
)

# üõë STOP! üõë

## Do you have good results yet?

- ‚úÖ YES:
  - Nevermind, please proceed.
- ‚ùå NO:
  - Change your settings and try:
    - Different models
    - Different variables
  - Go back to notebooks 01 and 02 and:
    - Get more variables
    - Get better variables
    - Get cleaner data
    - Look At It On A Map

# 4. Finalize model

Once you have good results, this will finalize them and prepare the rest of the pipeline for final processing.

In [None]:
results = from_checkpoint("4-model-00-finalize-models", finalize_models,
    {
        "sup": sales_univ_pair,
        "settings": load_settings(),
        "save_results": False,
        "verbose": verbose
    }
)

# 5. Generate reports

In [None]:
# run ratio study reports
run_and_write_ratio_study_breakdowns(load_settings())

# 6. Make it map ready

In [None]:
results