# 02 â€” Feature engineering & caching

This notebook:
- builds a model table (tabular + raw text)
- saves it to `data/processed/` to speed up later experiments

In [None]:
import pandas as pd

from src.config import Paths
from src.data.io import load_renthop_json
from src.features.build import build_model_table

paths = Paths()
train, test = load_renthop_json(paths.data_raw/"train.json", paths.data_raw/"test.json")

full = pd.concat([train, test], ignore_index=True)
model_df, spec = build_model_table(full)

print("Full model table:", model_df.shape)
display(model_df.head(3))

In [None]:
# Save
paths.data_processed.mkdir(parents=True, exist_ok=True)
out_path = paths.data_processed / "model_table.parquet"
model_df.to_parquet(out_path, index=False)
print("Saved:", out_path)

In [None]:
# Save feature spec (for reproducible column lists)
import json
spec_path = paths.data_processed / "feature_spec.json"
spec_dict = {
    "numeric_cols": spec.numeric_cols,
    "categorical_cols": spec.categorical_cols,
    "text_cols": spec.text_cols,
    "datetime_cols": spec.datetime_cols,
}
spec_path.write_text(json.dumps(spec_dict, indent=2), encoding="utf-8")
print("Saved:", spec_path)