In [1]:
# ruff: noqa: E402
import math
import warnings
from typing import Dict, Literal
import time

warnings.simplefilter("ignore")
import delu  # Deep Learning Utilities: https://github.com/Yura52/delu
import numpy as np
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import torch
import torch.nn.functional as F
import torch.optim
from torch import Tensor
from tqdm.std import tqdm
import json
import sys

warnings.resetwarnings()

from rtdl_revisiting_models import MLP, ResNet, FTTransformer

sys.path.append('..')

from interpretDistill.fourierDistill import *
from interpretDistill.binaryTransformer import *

In [18]:
n_inter = 3
k_cv = 3

In [19]:
dataset = sklearn.datasets.fetch_california_housing(as_frame = True)
X: np.ndarray = dataset["data"]
Y: np.ndarray = dataset["target"]

all_idx = np.arange(len(Y))
train_idx, test_idx = sklearn.model_selection.train_test_split(
    all_idx, train_size=0.8, random_state = 0
)
# train_idx, val_idx = sklearn.model_selection.train_test_split(
#     trainval_idx, train_size=0.8, random_state = 0
# )

In [20]:
bt_bin3 = BinaryTransformer(depth = 3, bit = False)
X_train_bin3 = bt_bin3.fit_and_transform(X.loc[train_idx, :], Y.loc[train_idx])
X_test_bin3 = bt_bin3.transform(X.loc[test_idx, :])

In [21]:
X_train_bin3.shape

(16512, 61)

In [22]:
bt_bit3 = BinaryTransformer(depth = 3, bit = True)
X_train_bit3 = bt_bit3.fit_and_transform(X.loc[train_idx, :], Y.loc[train_idx])
X_test_bit3 = bt_bit3.transform(X.loc[test_idx, :])

In [23]:
X_train_bit3.shape

(16512, 24)

In [24]:
bt_bit4 = BinaryTransformer(depth = 4, bit = True)
X_train_bit4 = bt_bit4.fit_and_transform(X.loc[train_idx, :], Y.loc[train_idx])
X_test_bit4 = bt_bit4.transform(X.loc[test_idx, :])

In [25]:
X_train_bit4.shape

(16512, 32)

In [26]:
y_train = Y.loc[train_idx]
y_test = Y.loc[test_idx]

In [27]:
train_time = []

In [28]:
ftd_bin3 = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)
ftd_bit3 = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)
ftd_bit4 = FTDistillCV(size_interactions = n_inter, k_cv = k_cv)

start = time.time()
ftd_bin3.fit(X_train_bin3, y_train, bt_bin3.no_interaction)
end = time.time()
train_time.append(end - start)
start = time.time()
ftd_bit3.fit(X_train_bit3, y_train)
end = time.time()
train_time.append(end - start)
start = time.time()
ftd_bit4.fit(X_train_bit4, y_train)
end = time.time()
train_time.append(end - start)

In [29]:
from sklearn.metrics import r2_score

In [30]:
model_list = [ftd_bin3, ftd_bit3, ftd_bit4]
model_names = ['(bin3, true, train)', '(bit3, true, train)', '(bit4, true, train)']

r2_df = pd.DataFrame()
r2_df['Model'] = model_names

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [31]:
r2_df['Train R2'] = [r2_score(ftd_bin3.predict(X_train_bin3), y_train),r2_score(ftd_bit3.predict(X_train_bit3), y_train), r2_score(ftd_bit4.predict(X_train_bit4), y_train)]
r2_df['Test R2'] = [r2_score(ftd_bin3.predict(X_test_bin3), y_test),r2_score(ftd_bit3.predict(X_test_bit3), y_test), r2_score(ftd_bit4.predict(X_test_bit4), y_test)]

In [32]:
r2_df['Train Time'] = train_time
r2_df['Total Num Features'] = [len(m.regression_model.coef_) for m in model_list]
r2_df['Num Selected Features'] = [sum(m.regression_model.coef_ != 0) for m in model_list]

In [33]:
r2_df

Unnamed: 0,Model,Train R2,Test R2,Train Time,Total Num Features,Num Selected Features
0,"(bin3, true, train)",0.707876,0.621138,288.776101,26326,999
1,"(bit3, true, train)",0.674444,0.589791,25.889728,2325,629
2,"(bit4, true, train)",0.74941,0.429176,55.189299,5489,1428


In [34]:
r2_df.to_csv('r2/binarize_prediction.csv')

In [36]:
[m.regression_model.reg_param for m in model_list]

AttributeError: 'ElasticNetCV' object has no attribute 'reg_param'

In [40]:
ftd_bit4.regression_model.alpha_

0.004144997917877904