In [None]:
# This notebook lets you look at the decision trees inside a PvSiteModel
# when using a compatible tree or tree ensemble.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# It's always annoying to set the working directory: we use an environment variable defined in the Makefile.
import os

CWD = os.environ.get("CWD")
if CWD:
    os.chdir(CWD)

In [None]:
from IPython.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import datetime as dt
from pprint import pprint

import logging

logging.getLogger("matplotlib.font_manager").setLevel(level=logging.CRITICAL)

import numpy as np
import dtreeviz

from psp.data.data_sources.pv import NetcdfPvDataSource
from psp.data.data_sources.nwp import NwpDataSource
from psp.serialization import load_model
from psp.models.recent_history import SetupConfig
from psp.training import make_data_loader
from psp.dataset import split_train_test
from psp.utils.batches import batch_features
from psp.typings import X

In [None]:
MODEL_NAME = "exp-name"
PV_DATA = "data/5min.netcdf"
NWP_DATA = "gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_3.zarr"

In [None]:
pv_ds = pv_ds = NetcdfPvDataSource(PV_DATA)
nwp_ds = NwpDataSource(NWP_DATA)

In [None]:
model = load_model(f"exp_results/{MODEL_NAME}/model.pkl")
model.setup(SetupConfig(pv_data_source=pv_ds, nwp_data_source=nwp_ds))

In [None]:
splits = split_train_test(pv_ds)


def get_features(x):
    features = model.get_features(x)
    features = model._regressor._prepare_features(batch_features([features]))
    return features


data_loader = make_data_loader(
    data_source=pv_ds,
    horizons=model.config.horizons,
    split=splits.test,
    get_features=get_features,
    random_state=np.random.RandomState(1234),
    num_workers=0,
    shuffle=True,
    step=15,
    limit=16,
)
test_set = list(data_loader)

In [None]:
def get_feature_names():
    features, names = model.get_features_with_names(X(pv_id="2881", ts=dt.datetime(2020, 1, 1)))
    _, names = model._regressor._prepare_features(batch_features([features]), names)
    return names


feature_names = get_feature_names()

In [None]:
features = np.vstack([s.features for s in test_set])
targets = np.stack(s.y.powers for s in test_set).reshape(-1)
# print(features.shape)
# print(targets.shape)
# pprint(feature_names)

In [None]:
TREE_IDX = 0
SAMPLE_IDX = 0

x = features[SAMPLE_IDX]
y = targets[SAMPLE_IDX]

print(y)
pprint(list(zip(feature_names, x)))

viz_model = dtreeviz.model(
    model._regressor._regressor,
    tree_index=TREE_IDX,
    X_train=features,
    y_train=targets,
    feature_names=feature_names,
    target_name="y",
)


display(
    viz_model.view(
        scale=1.5,
        orientation="LR",
        fancy=False,
        x=x,
        #     depth_range_to_display=(0, 2)
    )
)
print(viz_model.explain_prediction_path(x=x))