# 0. Settings

In [None]:
# Change these

locality = "nc-guilford"
verbose = True
clear_checkpoints = False

# 1. Basic setup

In [None]:
from init_notebooks import setup_environment
setup_environment()

In [None]:
# import a bunch of stuff

import os
import pandas as pd
from openavmkit.data import (
    load_data,
    enrich_time
)    
from openavmkit.pipeline import (
    NotebookState, 
    set_locality,
    examine_df
)
from openavmkit.utilities.settings import (
    load_settings
)
from openavmkit.cleaning import (
    clean_valid_sales,
    fill_unknown_values
)
from openavmkit.checkpoint import (
    from_checkpoint,
    write_checkpoint,
    delete_checkpoints
)
from openavmkit.time_adjustment import (
    enrich_time_adjustment
)
from openavmkit.horizontal_equity_study import (
    mark_horizontal_equity_clusters_per_model_group
)
from openavmkit.sales_scrutiny_study import (
    run_sales_scrutiny_per_model_group
)

In [None]:
if 'inited' not in globals():
    nbs: NotebookState = None
    inited = True
nbs = set_locality(nbs, locality)
settings = load_settings()

In [None]:
if clear_checkpoints:
    delete_checkpoints("1-predict")

## 1.1. Load data

In [None]:
# load the data
df = load_data(settings)

## 1.2. Fill unknowns in data

In [None]:
df = fill_unknown_values(df, settings)

In [None]:
examine_df(df, settings)

# 2. Enrichment

In [None]:
df = from_checkpoint("2-clean-00-horizontal-equity", mark_horizontal_equity_clusters_per_model_group,
    {
        "df_in": df,
        "settings": settings,
        "verbose": verbose
    }
)

In [None]:
df = clean_valid_sales(df, settings)

In [None]:
df = enrich_time(df)

In [None]:
df = from_checkpoint("2-clean-01-enrich-time-adjustment", enrich_time_adjustment,
    {
        "df_in": df,
        "settings": settings,
        "verbose": verbose
    }
)

In [None]:
df = from_checkpoint("2-clean-02-sales-scrutiny", run_sales_scrutiny_per_model_group,
    {
        "df_in": df,
        "settings": settings,
        "verbose": verbose
    }
)

In [None]:
write_checkpoint(df, "2-clean-03-out")