In [1]:
# ruff: noqa: E402
import math
import warnings
from typing import Dict, Literal
import time

warnings.simplefilter("ignore")
import delu  # Deep Learning Utilities: https://github.com/Yura52/delu
import numpy as np
import scipy.special
import sklearn.datasets
import sklearn.metrics
import sklearn.model_selection
import sklearn.preprocessing
import torch
import torch.nn.functional as F
import torch.optim
from torch import Tensor
from tqdm.std import tqdm
import json
import sys
from sklearn.metrics import r2_score
warnings.resetwarnings()

from rtdl_revisiting_models import MLP, ResNet, FTTransformer

from fourierDistill import *
from featurizer import BinaryTransformer
from subset_predictors import *

In [2]:
from mdlp.discretization import MDLP

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  min_split=1e-3, dtype=np.int):


In [3]:
dataset = sklearn.datasets.fetch_california_housing(as_frame = True)
#dataset = sklearn.datasets.load_iris(as_frame = True)
X: np.ndarray = dataset["data"]
Y: np.ndarray = dataset["target"]

all_idx = np.arange(len(Y))
train_idx, test_idx = sklearn.model_selection.train_test_split(
    all_idx, train_size=0.8, random_state = 0
)
# train_idx, val_idx = sklearn.model_selection.train_test_split(
#     trainval_idx, train_size=0.8, random_state = 0
# )
X_train = X.loc[train_idx]
X_test = X.loc[test_idx]
y_train = Y.loc[train_idx]
y_test = Y.loc[test_idx]

In [4]:
n_inter = 3
k_cv = 3

In [5]:
bt_bin3 = BinaryTransformer(depth = 3, bit = False)
X_train_bin3 = bt_bin3.fit_and_transform(X.loc[train_idx, :], Y.loc[train_idx])
X_test_bin3 = bt_bin3.transform(X.loc[test_idx, :])

In [29]:
import xgboost as xgb

model = xgb.XGBRegressor()

# Train the model
model.fit(X_train_bin3, y_train)

# Predict on the test se

In [51]:
ftd_bin3 = FTDistillRegressorCV(
                 pre_interaction='l0l2', 
                 pre_lam1=0.01, 
                 pre_lam2=0.01,
                 pre_max_features=0.7,
                 post_interaction='l0l2', 
                 post_lam1=0.1, 
                 post_lam2=0.1,
                 post_max_features=40,
                 size_interactions=3)


start = time.time()
ftd_bin3.fit(X_train_bin3, pd.Series(model.predict(X_train_bin3).astype(np.float64)), bt_bin3.no_interaction)
end = time.time()

42
l0              0.000037
support_size          37
intercept       6.980521
converged           True
l2                 0.001
Name: 255, dtype: object
Selected features: Index(['MedInc_3', 'MedInc_4', 'MedInc_6', 'MedInc_10', 'MedInc_11',
       'MedInc_13', 'MedInc_14', 'HouseAge_3', 'HouseAge_4', 'HouseAge_7',
       'HouseAge_8', 'AveRooms_10', 'AveRooms_11', 'AveRooms_13',
       'AveBedrms_3', 'AveBedrms_4', 'AveBedrms_11', 'AveBedrms_13',
       'Population_6', 'Population_14', 'AveOccup_3', 'AveOccup_4',
       'AveOccup_6', 'AveOccup_7', 'Latitude_3', 'Latitude_6', 'Latitude_10',
       'Latitude_11', 'Latitude_13', 'Latitude_14', 'Longitude_3',
       'Longitude_4', 'Longitude_6', 'Longitude_7', 'Longitude_10',
       'Longitude_11', 'Longitude_13'],
      dtype='object')
Post-interaction model fitting
(16512, 5836)
40
l0              0.000351
support_size          36
intercept       3.115394
converged           True
l2                  0.01
Name: 131, dtype: object
Re-fitti

In [52]:
ftd_bin3.post_interaction_features

Index([                                 ('MedInc_3',),
                          ('Latitude_14', 'MedInc_6'),
                         ('MedInc_10', 'Longitude_4'),
                        ('MedInc_10', 'Longitude_13'),
                         ('Longitude_6', 'MedInc_13'),
                       ('AveBedrms_11', 'AveOccup_6'),
                       ('AveOccup_7', 'Longitude_13'),
                        ('Latitude_3', 'Longitude_7'),
                       ('Latitude_10', 'Longitude_4'),
           ('Latitude_14', 'MedInc_3', 'AveBedrms_3'),
           ('Latitude_14', 'MedInc_3', 'AveBedrms_4'),
             ('MedInc_3', 'Latitude_6', 'AveOccup_4'),
            ('Latitude_11', 'HouseAge_3', 'MedInc_4'),
          ('Latitude_14', 'Population_6', 'MedInc_4'),
          ('Longitude_11', 'Latitude_6', 'MedInc_10'),
           ('MedInc_11', 'Longitude_4', 'HouseAge_3'),
            ('MedInc_11', 'HouseAge_4', 'AveOccup_4'),
           ('MedInc_11', 'AveOccup_4', 'AveRooms_11'),
         (

In [56]:
r2_score(model.predict(X_train_bin3), y_train), r2_score(model.predict(X_test_bin3), y_test)

(0.7571307197752932, 0.6153546578316168)

In [57]:
r2_score(ftd_bin3.predict(X_train_bin3), y_train), r2_score(ftd_bin3.predict(X_test_bin3), y_test)

(0.5873351037872823, 0.5363796262189857)

In [58]:
r2_score(ftd_bin3.predict(X_train_bin3), model.predict(X_train_bin3).astype(np.float64)), r2_score(ftd_bin3.predict(X_test_bin3), model.predict(X_test_bin3).astype(np.float64))

(0.8992051757397239, 0.910040161082565)

In [9]:
X_train_bin3.values.astype(np.float64).shape

(16512, 61)

In [10]:
fit_model_2 = l0learn.fit(X_train_bin3.values.astype(np.float64),y_train.to_numpy(), penalty="L0L2", num_gamma = 5, max_support_size=3)

In [11]:
cv_fit_result = l0learn.cvfit(X_train_bin3.values.astype(np.float64),y_train.to_numpy(), penalty='L0L2', num_gamma = 5,seed=5, max_support_size=3)

In [12]:
gamma_mins = [(i, np.argmin(cv_mean), np.min(cv_mean)) for i, cv_mean in enumerate(cv_fit_result.cv_means)]
gamma_mins

[(0, 0, 19.184006849660374),
 (1, 5, 22.858569135606952),
 (2, 6, 24.578261227058643),
 (3, 6, 24.62042985435774),
 (4, 6, 24.621207457916515)]

In [13]:
fit_model_2

Unnamed: 0,l0,support_size,intercept,converged,l2
0,0.004457,0,2.072499,True,10.0
1,0.004412,1,2.130273,True,10.0
2,0.003092,1,2.130273,True,10.0
3,0.002474,4,2.124668,True,10.0
4,0.044053,0,2.072499,True,0.562341
5,0.043612,1,2.643524,True,0.562341
6,0.030561,1,2.643524,True,0.562341
7,0.024448,2,2.458954,True,0.562341
8,0.023715,2,2.458954,True,0.562341
9,0.018972,3,2.339104,True,0.562341


In [14]:
optimal_gamma_index, optimal_lambda_index, min_error = min(gamma_mins, key = lambda t: t[2])
print(optimal_gamma_index, optimal_lambda_index, min_error)
print("Optimal lambda = ", fit_model_2.lambda_0[optimal_gamma_index][optimal_lambda_index])

0 0 19.184006849660374
Optimal lambda =  0.004457064969828327


In [15]:
cv_fit_result.coeff(lambda_0=fit_model_2.lambda_0[optimal_gamma_index][optimal_lambda_index],
                    gamma=fit_model_2.gamma[optimal_gamma_index]).toarray()

array([[2.07249896],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.   

In [16]:
gamma_mins = [(i, np.argmin(cv_mean), np.min(cv_mean)) for i, cv_mean in enumerate(cv_fit_result.cv_means)]
gamma_mins

[(0, 0, 19.184006849660374),
 (1, 5, 22.858569135606952),
 (2, 6, 24.578261227058643),
 (3, 6, 24.62042985435774),
 (4, 6, 24.621207457916515)]

In [17]:
cv_fit_result = l0learn.cvfit(X_train_bin3.values.astype(np.float64),y_train.to_numpy(), penalty='L0L2')

In [18]:
gamma_mins = [(i, np.argmax(cv_mean), np.max(cv_mean)) for i, cv_mean in enumerate(cv_fit_result.cv_means)]
gamma_mins

[(0, 56, 23.05662736837317)]

In [19]:
fit_model_2 = l0learn.fit(X_train_bin3.values.astype(np.float64),y_train.to_numpy(), penalty="L0L2", num_gamma = 5, gamma_min = 0.0001, gamma_max = 10, max_support_size=20)

In [20]:
df = fit_model_2.characteristics()
scores = []
for i in range(229):
    t = df.iloc[i, :]
    scores.append(np.mean((y_train-fit_model_2.predict(x = X_train_bin3, lambda_0 = t['l0'], gamma=t['l2']).reshape(-1, ))**2))

IndexError: single positional indexer is out-of-bounds

In [None]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Initialize KFold
kf = KFold(n_splits=5)

# List to store MSE for each fold
mse_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_train_bin3):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = model.predict(X_test)
    
    # Calculate MSE
    mse = mean_squared_error(y_test, y_pred)
    mse_scores.append(mse)

# Calculate average MSE over all folds
average_mse = np.mean(mse_scores)
print(f'MSE scores for each fold: {mse_scores}')
print(f'Average MSE: {average_mse}')


In [None]:
scores

In [None]:
optimal_gamma_index, optimal_lambda_index, min_error = max(gamma_mins, key = lambda t: t[2])
print(optimal_gamma_index, optimal_lambda_index, min_error)
print("Optimal lambda = ", fit_model_2.lambda_0[optimal_gamma_index][optimal_lambda_index])

In [None]:
np.mean((2.07249896 - y_train.to_numpy())**2)

In [None]:
cv_fit_result.coeff(lambda_0=fit_model_2.lambda_0[optimal_gamma_index][optimal_lambda_index],
                    gamma=fit_model_2.gamma[optimal_gamma_index]).toarray()

In [None]:
import numpy as np
from celer import ElasticNet
from subset_predictors import *

# Generating some random data for demonstration
np.random.seed(0)
n_samples, n_features = 50, 200
X = np.random.randn(n_samples, n_features)
y = np.random.randn(n_samples)

# Fitting the ElasticNet model
alpha = 0.1  # Regularization strength
l1_ratio = 0.5  # Ratio between L1 and L2 regularization (0 for Ridge, 1 for Lasso)
model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=True)
l0l2 = L0L2Regressor()
l0l2.fit(X, y)
model.fit(X, y)

# Getting the coefficients
coef = model.coef_

# Printing the number of non-zero coefficients
non_zero_coef_count = np.sum(coef != 0)
print("Number of non-zero coefficients:", non_zero_coef_count)


In [None]:
model.predict(X)

In [None]:
len(l0l2.coef_ != 0)

In [None]:
len(coef)

In [None]:
ftd_bin3.post_interaction_model.C

In [None]:
ftd_bin3.post_interaction_model.C

In [None]:
np.mean(ftd_bin3.predict(X_test_bin3)== y_test)

In [None]:
from sklearn.metrics import r2_score

In [None]:
r2_score(ftd_bin3.predict(X_train_bin3), y_train)

In [None]:
r2_score(ftd_bin3.predict(X_test_bin3), y_test)

In [None]:
m = MDLP()

In [None]:
X_train_d = m.fit_transform(X_train, y_train)

In [None]:
X_test_d = m.transform(X_test)

In [None]:
X_test_d

In [None]:
ftd_bin3.post_lam2

In [None]:
from sklearn.metrics import r2_score

In [None]:
model_list = [ftd_bin3, ftd_bit3, ftd_bit4]
model_names = ['(bin3, true, train)', '(bit3, true, train)', '(bit4, true, train)']

r2_df = pd.DataFrame()
r2_df['Model'] = model_names

In [None]:
r2_df['Train R2'] = [r2_score(ftd_bin3.predict(X_train_bin3), y_train),r2_score(ftd_bit3.predict(X_train_bit3), y_train), r2_score(ftd_bit4.predict(X_train_bit4), y_train)]
r2_df['Test R2'] = [r2_score(ftd_bin3.predict(X_test_bin3), y_test),r2_score(ftd_bit3.predict(X_test_bit3), y_test), r2_score(ftd_bit4.predict(X_test_bit4), y_test)]

In [None]:
r2_df['Train Time'] = train_time
r2_df['Total Num Features'] = [len(m.regression_model.coef_) for m in model_list]
r2_df['Num Selected Features'] = [sum(m.regression_model.coef_ != 0) for m in model_list]

In [None]:
r2_df

In [None]:
r2_df.to_csv('r2/binarize_prediction.csv')

In [None]:
[m.regression_model.reg_param for m in model_list]

In [None]:
ftd_bit4.regression_model.alpha_

In [None]:
from subset_predictors import *
l0l2cv = L0L2RegressorCV()
l0l2cv.fit(X, Y)

l0l2 = L0L2Regressor()
l0l2.fit(X, Y)

In [None]:
# Import necessary libraries
import numpy as np
#import l0learn
from subset_predictors import *

# Generate synthetic data
np.random.seed(0)
n_samples, n_features = 100, 20
X = np.random.randn(n_samples, n_features)
true_coef = np.zeros(n_features)
true_coef[::2] = 1  # Only every other feature is relevant
y = np.dot(X, true_coef) + np.random.normal(size=n_samples)

# Fit the model using l0learn
l0l2cv = L0L2RegressorCV()
l0l2cv.fit(X, y)

l0l2 = L0L2Regressor()
l0l2.fit(X, y)

# Print the fitted coefficients for the best model
print("Fitted coefficients:")
#print(fit.coef_)

# # Make predictions
# X_new = np.random.randn(10, n_features)  # New data for prediction
# predictions = l0learn_predict(fit, X_new)
# print("Predictions:")
# print(predictions)


In [None]:
l0l2.best_lambda, l0l2cv.best_lambda

In [None]:
import l0learn

fitl2 = l0learn.cvfit(X, y, penalty = 'L0L2')
fitl0 = l0learn.cvfit(X, y, penalty = 'L0')

In [None]:
fitl2.cv_means[0].shape, fitl0.cv_means[0].shape

In [None]:
X.shape

In [None]:
gamma_mins = [(i, np.argmin(cv_mean), np.min(cv_mean)) for i, cv_mean in enumerate(fit.cv_means)]
gamma_mins

In [None]:
optimal_gamma_index, optimal_lambda_index, min_error = min(gamma_mins, key = lambda t: t[2])

In [None]:
fit.coeff(lambda_0=fit.lambda_0[optimal_gamma_index][optimal_lambda_index],
                    gamma=fit.gamma[optimal_gamma_index]).toarray().reshape(-1, )

In [None]:
len(fit.intercepts), len(fit.intercepts[0])

In [None]:
np.asarray(fit.coeff().todense())[:, 44]

In [None]:
from importlib.metadata import version
version('l0learn')

In [None]:
fit.characteristics().to_dict()['l0']

In [None]:
fit.characteristics().to_dict()['l2']

In [None]:
np.asarray(fit.coeff().todense())

In [None]:
fit

In [None]:
fit.coeff(lambda_0=0.079901, gamma=0).toarray()

In [None]:
fit.coeff(lambda_0 = 0.079901, gamma = 0)

In [None]:
import numpy as np
np.random.seed(4) # fix the seed to get a reproducible result
n, p, k = 500, 1000, 10
X = np.random.normal(size=(n, p))
B = np.zeros(p)
B[:k] = 1
e = np.random.normal(size=(n,))/2
y = X@B + e

In [None]:
fit_model = l0learn.fit(X, y, penalty="L0", max_support_size=20)

In [None]:
fit_model

In [None]:
fit_model_2 = l0learn.fit(X, y, penalty="L0L2", num_gamma = 5, gamma_min = 0.0001, gamma_max = 10, max_support_size=5)

In [None]:
df = fit_model_2.characteristics()
stats = df[df['support_size'] < 5].sort_values('support_size', ascending = False).iloc[0, :]

In [None]:
stats['l0']

In [None]:
fit_model_2.predict(X, lambda_0=0.0016, gamma=10).reshape(-1, )

In [None]:
X['MedInc'].to_numpy()

In [None]:
import numpy as np
from l0bnb import fit_path
from l0bnb import gen_synthetic

"""
For demonstration, we first generate a synthetic regression dataset (X,y)
as follows: y = X*b + epsilon, where the true vector of coefficients b
is sparse and has only 10 nonzero entries.
We set the number of samples n=1000 and number of features p=10,000.
"""
X, y, b = gen_synthetic(n=1000, p=10000, supp_size=10)
print("Nonzero indices in b: ", np.nonzero(b)[0])

"""
Run L0BnB to solve the problem for a sequence of lambda_0's.
By default, the sequence of lambda_0's is automatically chosen by the toolkit.
Use max_nonzeros=10 to stop the regularization path when it exceeds 10 nonzeros.
Here we fix lambda_2 = 0.01 (generally, this is data-dependent).
"""
sols = fit_path(X, y, lambda_2 = 0.01, max_nonzeros = 10)

"""
sols is a list of solutions, each corresponding to a different lambda_0.
Below we inspect the solution with index 4.
The estimated coefficients vector "b_estimated" and the intercept term can be accessed as follows:
"""
b_estimated = sols[4]["B"] # a numpy array.
intercept = sols[4]["B0"]

# To check the nonzero indices in b_estimated:
print("Nonzero indices in b_estimated: ", np.nonzero(b_estimated)[0])
# The nonzero indices in b_estimated match that of b.

# Predictions on the training data can be made as follows:
y_estimated = np.dot(X, b_estimated) + intercept

# For more advanced usage, check the documentation of fit_path:
print(fit_path.__doc__)


In [None]:
import numpy as np
from irf import irf_utils
from irf.ensemble import RandomForestClassifierWithWeights
