In [1]:
import warnings

warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
import ngboost as ng
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch import optim
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import fetch_california_housing

from models.flow import build_model
from tfboost.flow import ContinuousNormalizingFlow
from tfboost.tree.engboost import EmbeddableNGBoost, EmbeddableNGBoost2, EmbeddableNGBoostDecisionPath
from tfboost.tfboost import TreeFlowBoost

In [2]:
x, y  = fetch_california_housing(return_X_y=True)

x_train, x_test, y_train, y_test = train_test_split(x, y)

# Embeddable NGBoost - N(0, 1) prior

In [3]:
flow = ContinuousNormalizingFlow(build_model(
    input_dim=1,
    hidden_dims=(80, 40),
    context_dim=100,
    conditional=True,
))

tree = EmbeddableNGBoost()

tfb = TreeFlowBoost(flow_model=flow, tree_model = tree, embedding_size=100)

In [4]:
tfb.fit(x_train, y_train, n_epochs=200)

[iter 0] loss=1.5647 val_loss=0.0000 scale=1.0000 norm=1.1075
[iter 100] loss=1.1513 val_loss=0.0000 scale=2.0000 norm=1.5690
[iter 200] loss=0.9118 val_loss=0.0000 scale=2.0000 norm=1.4040
[iter 300] loss=0.7639 val_loss=0.0000 scale=1.0000 norm=0.6790
[iter 400] loss=0.6937 val_loss=0.0000 scale=1.0000 norm=0.6764


3.139611005783081:   2%|▏         | 3/200 [00:22<25:04,  7.64s/it] 


RuntimeError: 

In [None]:
print("Train")
y_hat_train_tree = tfb.tree_model.predict(x_train)
print(mean_squared_error(y_train, y_hat_train_tree))

y_hat_train_tfb = tfb.predict(x_train, num_samples=50)
print(mean_squared_error(y_train, y_hat_train_tfb))

print("Test")
y_hat_test_tree = tfb.tree_model.predict(x_test)
print(mean_squared_error(y_test, y_hat_test_tree))

y_hat_test_tfb = tfb.predict(x_test, num_samples=50)
print(mean_squared_error(y_test, y_hat_test_tfb))

# Embeddable NGBoost - N(mu, sigma) prior

In [None]:
flow = ContinuousNormalizingFlow(build_model(
    input_dim=1,
    hidden_dims=(80, 40),
    context_dim=100,
    conditional=True,
))

tree = EmbeddableNGBoost2()

tfb = TreeFlowBoost(flow_model=flow, tree_model = tree, embedding_size=100)

In [None]:
tfb.fit(x, y, n_epochs=200)

In [None]:
print("Train")
y_hat_train_tree = tfb.tree_model.predict(x_train)
print(mean_squared_error(y_train, y_hat_train_tree))

y_hat_train_tfb = tfb.predict(x_train, num_samples=50)
print(mean_squared_error(y_train, y_hat_train_tfb))

print("Test")
y_hat_test_tree = tfb.tree_model.predict(x_test)
print(mean_squared_error(y_test, y_hat_test_tree))

y_hat_test_tfb = tfb.predict(x_test, num_samples=50)
print(mean_squared_error(y_test, y_hat_test_tfb))

# Embeddable NGBoost - Decision Path

In [None]:
flow = ContinuousNormalizingFlow(build_model(
    input_dim=1,
    hidden_dims=(80, 40),
    context_dim=100,
    conditional=True,
))

tree = EmbeddableNGBoostDecisionPath()

tfb = TreeFlowBoost(flow_model=flow, tree_model = tree, embedding_size=100)

In [None]:
tfb.fit(x, y, n_epochs=200)

In [None]:
print("Train")
y_hat_train_tree = tfb.tree_model.predict(x_train)
print(mean_squared_error(y_train, y_hat_train_tree))

y_hat_train_tfb = tfb.predict(x_train, num_samples=50)
print(mean_squared_error(y_train, y_hat_train_tfb))

print("Test")
y_hat_test_tree = tfb.tree_model.predict(x_test)
print(mean_squared_error(y_test, y_hat_test_tree))

y_hat_test_tfb = tfb.predict(x_test, num_samples=50)
print(mean_squared_error(y_test, y_hat_test_tfb))