In [1]:
# ruff: noqa: E402
import math
import warnings
from typing import Dict, Literal

warnings.simplefilter("ignore")
import delu  # Deep Learning Utilities: https://github.com/Yura52/delu
import numpy as np
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import torch
import torch.nn.functional as F
import torch.optim
from torch import Tensor
from tqdm.std import tqdm
import json
import sys

warnings.resetwarnings()

from rtdl_revisiting_models import MLP, ResNet, FTTransformer

sys.path.append('..')

from interpretDistill.fourierDistill import *
from interpretDistill.binaryTransformer import *

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Set random seeds in all libraries.
delu.random.seed(0)

0

In [3]:
# >>> Dataset.
TaskType = Literal["regression", "binclass", "multiclass"]

task_type: TaskType = "regression"
n_classes = None
dataset = sklearn.datasets.fetch_california_housing(as_frame = True)
X: np.ndarray = dataset["data"]
Y: np.ndarray = dataset["target"]

all_idx = np.arange(len(Y))
trainval_idx, test_idx = sklearn.model_selection.train_test_split(
    all_idx, train_size=0.8, random_state = 0
)
train_idx, val_idx = sklearn.model_selection.train_test_split(
    trainval_idx, train_size=0.8, random_state = 0
)

X_b = {}
bt = BinaryTransformer(depth = 3, bit = False)
X_b['train'] = bt.fit_and_transform(X.loc[train_idx, :], Y.loc[train_idx])
X_b['val'] = bt.transform(X.loc[val_idx, :])
X_b['test'] = bt.transform(X.loc[test_idx, :])


# >>> Continuous features.
#X_cont: np.ndarray = X[['carat', 'depth', 'table', 'x', 'y', 'z']].to_numpy().astype(np.float32)
n_cont_features = 0

# >>> Categorical features.
# NOTE: the above datasets do not have categorical features, but,
# for the demonstration purposes, it is possible to generate them.

cat_cardinalities = [len(X_b['train'][c].value_counts()) for c in X_b['train'].columns]
print(cat_cardinalities)


for k in X_b.keys():
    X_b[k] = (X_b[k] + 1) // 2

# >>> Labels.
# Regression labels must be represented by float32.

Y = Y.to_numpy()
if task_type == "regression":
    Y = Y.astype(np.float32)
else:
    assert n_classes is not None
    Y = Y.astype(np.int64)
    assert set(Y.tolist()) == set(
        range(n_classes)
    ), "Classification labels must form the range [0, 1, ..., n_classes - 1]"

data_numpy = {
    "train": {"x_cat": X_b['train'].to_numpy().astype(np.int64), "y": Y[train_idx]},
    "val": {"x_cat": X_b['val'].to_numpy().astype(np.int64), "y": Y[val_idx]},
    "test": {"x_cat": X_b['test'].to_numpy().astype(np.int64), "y": Y[test_idx]},
}

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]


In [4]:
if task_type == "regression":
    Y_mean = data_numpy["train"]["y"].mean().item()
    Y_std = data_numpy["train"]["y"].std().item()
    for part in data_numpy:
        data_numpy[part]["y"] = (data_numpy[part]["y"] - Y_mean) / Y_std

# >>> Convert data to tensors.
data = {
    part: {'y': torch.as_tensor(data_numpy[part]['y'], device=device)}
    for part in data_numpy
}

if True:
    data["train"]["x_cat"] = torch.from_numpy(data_numpy["train"]["x_cat"]).to(torch.int64).to(device)
    data["val"]["x_cat"] = torch.from_numpy(data_numpy["val"]["x_cat"]).to(torch.int64).to(device)
    data["test"]["x_cat"] = torch.from_numpy(data_numpy["test"]["x_cat"]).to(torch.int64).to(device)

if task_type != "multiclass":
    # Required by F.binary_cross_entropy_with_logits
    for part in data:
        data[part]["y"] = data[part]["y"].float()

In [5]:
with open('predictions/ftt_bin_depth3_preds.json') as json_file:
    bin_preds = json.load(json_file)

with open('predictions/ftt_orig_preds.json') as json_file:
    orig_preds = json.load(json_file)

In [6]:
for s in bin_preds.keys():
    for pt in bin_preds[s].keys():
        bin_preds[s][pt] = np.array(bin_preds[s][pt])

In [7]:
for s in orig_preds.keys():
    for pt in orig_preds[s].keys():
        orig_preds[s][pt] = np.array(orig_preds[s][pt])

In [8]:
def dict_to_series(preds):
    ret = []
    for i in ['train', 'val', 'test']:
        ret.append(pd.Series(preds[i]['y_hat'], name = 'MedHouseVal'))
    return tuple(ret)

In [9]:
y_train_bin, y_val_bin, y_test_bin = dict_to_series(bin_preds)

In [10]:
y_train_orig, y_val_orig, y_test_orig = dict_to_series(orig_preds)

In [11]:
#train: (bin, bin) + (bin, orig)
#val: (bin, bin) + (bin, orig)
#train val: (bin, bin) + (bin, orig)

In [12]:
n_inter = 3
k_cv = 2

In [13]:
ftd_bo_train = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)
ftd_bb_train = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)
ftd_bo_val = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)
ftd_bb_val = FTDistillCV(size_interactions = n_inter , k_cv = k_cv)
ftd_bo_tv = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)
ftd_bb_tv = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)

In [14]:
ftd_bo_train.fit(X_b['train'], y_train_orig, bt.no_interaction)
print('bo_train concluded')
ftd_bb_train.fit(X_b['train'], y_train_bin, bt.no_interaction)
print('bb_train concluded')
ftd_bo_val.fit(X_b['val'], y_val_orig, bt.no_interaction)
print('bo_val concluded')
ftd_bb_val.fit(X_b['val'], y_val_bin, bt.no_interaction)
print('bb_val concluded')
ftd_bo_tv.fit(pd.concat([X_b['train'], X_b['val']], axis = 0), pd.concat([y_train_orig, y_val_orig], axis = 0), bt.no_interaction)
print('bo_tv concluded')
ftd_bb_tv.fit(pd.concat([X_b['train'], X_b['val']], axis = 0), pd.concat([y_train_bin, y_val_bin], axis = 0), bt.no_interaction)
print('bb_tv concluded')

  sol = celer(
  sol = celer(
  sol = celer(


bo_train concluded


  sol = celer(
  sol = celer(
  sol = celer(


bb_train concluded
bo_val concluded


  sol = celer(


bb_val concluded
bo_tv concluded


  sol = celer(


bb_tv concluded


  sol = celer(


In [35]:
from sklearn.metrics import mean_squared_error, r2_score

r2_score(ftd_bo_tv.predict(X_b['val']), orig_preds['val']['y_hat'])

0.8612636716770935

In [37]:
ftd_list = [ftd_bo_train, ftd_bb_train, ftd_bo_val, ftd_bb_val, ftd_bo_tv, ftd_bb_tv]
ftd_names = ['(bin, orig, train)', '(bin, bin, train)', '(bin, orig, val)', '(bin, bin, val)', '(bin, orig, train+val)', '(bin, bin, train+val)']

In [39]:
r2_true_df = pd.DataFrame(columns = ['Model', 'Train R2', 'Val R2', 'Test R2'])

In [40]:
for i, j in zip(['train', 'val', 'test'], ['Train R2', 'Val R2', 'Test R2']):
    r2_true_df[j] = [r2_score(m.predict(X_b[i]), orig_preds[i]['y_true']) for m in ftd_list]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [42]:
r2_true_df['Model'] = ftd_names

In [47]:
r2_true_df.loc[len(r2_true_df)] = ['FTTransformer']+[r2_score(orig_preds[i]['y_hat'], orig_preds[i]['y_true']) for i in ['train', 'val', 'test']]

In [49]:
r2_true_df.to_csv('r2/bin_depth3_distillation_true_R2.csv')

In [50]:
r2_hat_df = pd.DataFrame(columns = ['Model', 'Train R2', 'Val R2', 'Test R2'])

In [54]:
for i, j in zip(['train', 'val', 'test'], ['Train R2', 'Val R2', 'Test R2']):
    r2_hat_df[j] = [r2_score(m.predict(X_b[i]), orig_preds[i]['y_hat']) for m in ftd_list]

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [55]:
r2_hat_df['Model'] = ftd_names

In [57]:
r2_hat_df.loc[len(r2_hat_df)] = ['FTTransformer']+[r2_score(orig_preds[i]['y_hat'], orig_preds[i]['y_hat']) for i in ['train', 'val', 'test']]

In [58]:
r2_hat_df

Unnamed: 0,Model,Train R2,Val R2,Test R2
0,"(bin, orig, train)",0.847424,0.832229,0.817176
1,"(bin, bin, train)",0.809716,0.821329,0.805811
2,"(bin, orig, val)",0.759469,0.842314,0.77407
3,"(bin, bin, val)",0.796792,0.825923,0.799527
4,"(bin, orig, train+val)",0.841185,0.861264,0.821455
5,"(bin, bin, train+val)",0.808141,0.823432,0.8059
6,FTTransformer,1.0,1.0,1.0


In [59]:
r2_hat_df.to_csv('r2/bin_depth3_distillation_hat_R2.csv')