In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

import sys
import os

sys.path.append('..')

from interpretDistill.fourierDistill import *
from interpretDistill.binaryTransformer import *

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

from ucimlrepo import fetch_ucirepo 
solar_flare = fetch_ucirepo(id=89) 
  
X = solar_flare.data.features 
y = solar_flare.data.targets['common flares']

X.columns = X.columns.str.replace(' ', '_')
y.name = 'common_flares'
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [3]:
bt_bit = BinaryTransformer(depth = 4, bit = True)
bt_bin = BinaryTransformer(depth = 4, bit = False)

In [4]:
X_train.shape, X_val.shape, X_test.shape

((833, 10), (278, 10), (278, 10))

In [5]:
X_train_bit = bt_bit.fit_and_transform(X_train, y_train)
X_val_bit = bt_bit.transform(X_val)
X_test_bit = bt_bit.transform(X_test)

In [6]:
X_train_bit.isnull().values.any()

False

In [7]:
X_train_bin = bt_bin.fit_and_transform(X_train, y_train)
X_val_bin = bt_bin.transform(X_val)
X_test_bin = bt_bin.transform(X_test)

In [8]:
X_train_bin.isnull().values.any()

False

In [9]:
X_train_orig = pd.get_dummies(X_train, columns=list(X.columns)).astype(int).replace({-1:-1, 0:-1, 1:1})
X_val_orig = pd.get_dummies(X_val, columns=list(X.columns)).astype(int).replace({-1:-1, 0:-1, 1:1})
X_test_orig = pd.get_dummies(X_test, columns=list(X.columns)).astype(int).replace({-1:-1, 0:-1, 1:1})

In [10]:
X_train_orig.isnull().values.any()

False

In [11]:
print(f'dummy (orig) number of features: {X_train_orig.shape[1]}')
print(f'bit number of features: {X_train_bit.shape[1]}')
print(f'bin number of features: {X_train_bin.shape[1]}')

dummy (orig) number of features: 32
bit number of features: 25
bin number of features: 27


In [12]:
X_train_orig = pd.get_dummies(X_train, columns=list(X.columns)).astype(int).replace({-1:-1, 0:-1, 1:1})
X_val_orig = pd.get_dummies(X_val, columns=list(X.columns)).astype(int).replace({-1:-1, 0:-1, 1:1})
X_test_orig = pd.get_dummies(X_test, columns=list(X.columns)).astype(int).replace({-1:-1, 0:-1, 1:1})

In [13]:
rf_orig = RandomForestRegressor(max_depth = 6, criterion = 'absolute_error')
rf_bit = RandomForestRegressor(max_depth = 6, criterion = 'absolute_error')
rf_bin = RandomForestRegressor(max_depth = 6, criterion = 'absolute_error')

In [14]:
# rf_orig.fit(X_train_orig, y_train)
# rf_bit.fit(X_train_bit, y_train)
# rf_bin.fit(X_train_bin, y_train)
rf_orig.fit(X_val_orig, y_val)
rf_bit.fit(X_val_bit, y_val)
rf_bin.fit(X_val_bin, y_val)

In [15]:
print(f'[orig] train MSE: {mean_squared_error(rf_orig.predict(X_train_orig),y_train)}, val MSE: {mean_squared_error(rf_orig.predict(X_val_orig),y_val)}, test MSE: {mean_squared_error(rf_orig.predict(X_test_orig),y_test)}')
print(f'[bit] train MSE: {mean_squared_error(rf_bit.predict(X_train_bit),y_train)}, val MSE: {mean_squared_error(rf_bit.predict(X_val_bit),y_val)}, test MSE: {mean_squared_error(rf_bit.predict(X_test_bit),y_test)}')
print(f'[bin] train MSE: {mean_squared_error(rf_bin.predict(X_train_bin),y_train)}, val MSE: {mean_squared_error(rf_bin.predict(X_val_bin),y_val)}, test MSE: {mean_squared_error(rf_bin.predict(X_test_bin),y_test)}')

[orig] train MSE: 0.5983678871548619, val MSE: 0.3154395683453237, test MSE: 0.5097756294964029
[bit] train MSE: 0.5893192076830733, val MSE: 0.30029685251798555, test MSE: 0.49097913669064747
[bin] train MSE: 0.6037669267707083, val MSE: 0.3197348021582734, test MSE: 0.5007365107913668


In [16]:
print(f'[orig RF] train R2: {r2_score(rf_orig.predict(X_train_orig),y_train)}, val R2: {r2_score(rf_orig.predict(X_val_orig),y_val)}, test R2: {r2_score(rf_orig.predict(X_test_orig),y_test)}')
print(f'[bit RF] train R2: {r2_score(rf_bit.predict(X_train_bit),y_train)}, val R2: {r2_score(rf_bit.predict(X_val_bit),y_val)}, test R2: {r2_score(rf_bit.predict(X_test_bit),y_test)}')
print(f'[bin RF] train R2: {r2_score(rf_bin.predict(X_train_bin),y_train)}, val R2: {r2_score(rf_bin.predict(X_val_bin),y_val)}, test R2: {r2_score(rf_bin.predict(X_test_bin),y_test)}')

[orig RF] train R2: -7.8267999626379705, val R2: -3.3563320769325555, test R2: -5.740792990588139
[bit RF] train R2: -6.334008505684093, val R2: -2.568303239713314, test R2: -5.815169764126726
[bin RF] train R2: -6.911864637693347, val R2: -3.3701343159161894, test R2: -4.953216550323675


In [17]:
ftd_orig = FTDistillCV(size_interactions = 3)
ftd_bit = FTDistillCV(size_interactions = 3)
ftd_bin = FTDistillCV(size_interactions = 3)

In [18]:
# ftd_bit.fit(X_val_bit, rf_bit.predict(X_val_bit)) #rf_orig.predict(X_val))
# ftd_bin.fit(X_val_bin, rf_bin.predict(X_val_bin), bt_bin.no_interaction) #rf_orig.predict(X_val), bt_bin.no_interaction)
ftd_orig.fit(X_train_orig, rf_orig.predict(X_train_orig))
ftd_bit.fit(X_train_bit, rf_bit.predict(X_train_bit)) #rf_orig.predict(X_val))
ftd_bin.fit(X_train_bin, rf_bin.predict(X_train_bin), bt_bin.no_interaction) #rf_orig.predict(X_val), bt_bin.no_interaction)

<interpretDistill.fourierDistill.FTDistillCV at 0x7f2c72480b50>

In [19]:
sum(ftd_orig.regression_model.coef_ != 0), sum(ftd_bit.regression_model.coef_ != 0), sum(ftd_bin.regression_model.coef_ != 0)

(497, 341, 283)

In [20]:
print(f'[orig FTD, true y] train MSE: {mean_squared_error(ftd_orig.predict(X_train_orig),y_train)}, val MSE: {mean_squared_error(ftd_orig.predict(X_val_orig),y_val)}, test MSE: {mean_squared_error(ftd_orig.predict(X_test_orig),y_test)}')
print(f'[orig FTD, RF y] train MSE: {mean_squared_error(ftd_orig.predict(X_train_orig),rf_orig.predict(X_train_orig))}, val MSE: {mean_squared_error(ftd_orig.predict(X_val_orig),rf_orig.predict(X_val_orig))}, test MSE: {mean_squared_error(ftd_orig.predict(X_test_orig),rf_orig.predict(X_test_orig))}')
print(f'[bit FTD, true y] train MSE: {mean_squared_error(ftd_bit.predict(X_train_bit),y_train)}, val MSE: {mean_squared_error(ftd_bit.predict(X_val_bit),y_val)}, test MSE: {mean_squared_error(ftd_bit.predict(X_test_bit),y_test)}')
print(f'[bit FTD, RF y] train MSE: {mean_squared_error(ftd_bit.predict(X_train_bit),rf_bit.predict(X_train_bit))}, val MSE: {mean_squared_error(ftd_bit.predict(X_val_bit),rf_bit.predict(X_val_bit))}, test MSE: {mean_squared_error(ftd_bit.predict(X_test_bit),rf_bit.predict(X_test_bit))}')
print(f'[bin FTD, true y] train MSE: {mean_squared_error(ftd_bin.predict(X_train_bin),y_train)}, val MSE: {mean_squared_error(ftd_bin.predict(X_val_bin),y_val)}, test MSE: {mean_squared_error(ftd_bin.predict(X_test_bin),y_test)}')
print(f'[bin FTD, RF y] train MSE: {mean_squared_error(ftd_bin.predict(X_train_bin),rf_bin.predict(X_train_bin))}, val MSE: {mean_squared_error(ftd_bin.predict(X_val_bin),rf_bin.predict(X_val_bin))}, test MSE: {mean_squared_error(ftd_bin.predict(X_test_bin),rf_bin.predict(X_test_bin))}')

[orig FTD, true y] train MSE: 0.5982423641942997, val MSE: 0.3242000959778041, test MSE: 0.5116333957184148
[orig FTD, RF y] train MSE: 2.2417318655605713e-05, val MSE: 0.001957781308415727, test MSE: 0.0020170178364034867
[bit FTD, true y] train MSE: 0.5883848043145837, val MSE: 0.317541541658904, test MSE: 0.48800177295305197
[bit FTD, RF y] train MSE: 0.0005690878985117301, val MSE: 0.002778836645009651, test MSE: 0.0035908204401600917
[bin FTD, true y] train MSE: 0.6033780988080844, val MSE: 0.3262531418857889, test MSE: 0.5054690569526445
[bin FTD, RF y] train MSE: 2.0400169937470757e-05, val MSE: 0.0017656864422279483, test MSE: 0.0021323978777435006


In [21]:
print(f'[orig FTD, true y] train R2: {r2_score(ftd_orig.predict(X_train_orig),y_train)}, val R2: {r2_score(ftd_orig.predict(X_val_orig),y_val)}, test R2: {r2_score(ftd_orig.predict(X_test_orig),y_test)}')
print(f'[orig FTD, RF y] train R2: {r2_score(ftd_orig.predict(X_train_orig),rf_orig.predict(X_train_orig))}, val R2: {r2_score(ftd_orig.predict(X_val_orig),rf_orig.predict(X_val_orig))}, test R2: {r2_score(ftd_orig.predict(X_test_orig),rf_orig.predict(X_test_orig))}')
print(f'[bit RF, true y] train R2: {r2_score(ftd_bit.predict(X_train_bit),y_train)}, val R2: {r2_score(ftd_bit.predict(X_val_bit),y_val)}, test R2: {r2_score(ftd_bit.predict(X_test_bit),y_test)}')
print(f'[bit RF, RF y] train R2: {r2_score(ftd_bit.predict(X_train_bit),rf_bit.predict(X_train_bit))}, val R2: {r2_score(ftd_bit.predict(X_val_bit),rf_bit.predict(X_val_bit))}, test R2: {r2_score(ftd_bit.predict(X_test_bit),rf_bit.predict(X_test_bit))}')
print(f'[bin RF, true y] train R2: {r2_score(ftd_bin.predict(X_train_bin),y_train)}, val R2: {r2_score(ftd_bin.predict(X_val_bin),y_val)}, test R2: {r2_score(ftd_bin.predict(X_test_bin),y_test)}')
print(f'[bin RF, RF y] train R2: {r2_score(ftd_bin.predict(X_train_bin),rf_bin.predict(X_train_bin))}, val R2: {r2_score(ftd_bin.predict(X_val_bin),rf_bin.predict(X_val_bin))}, test R2: {r2_score(ftd_bin.predict(X_test_bin),rf_bin.predict(X_test_bin))}')

[orig FTD, true y] train R2: -7.885822041064005, val R2: -3.7111191148956344, test R2: -6.305033469166448
[orig FTD, RF y] train R2: 0.9996670307618889, val R2: 0.9715504681852587, test R2: 0.9712012880196302
[bit RF, true y] train R2: -6.622357480445224, val R2: -3.105317090567996, test R2: -6.161478760187267
[bit RF, RF y] train R2: 0.9926276284356037, val R2: 0.9640739743497594, test R2: 0.9473043219530951
[bin RF, true y] train R2: -6.962643878412718, val R2: -3.7379328982949005, test R2: -5.2486011952648255
[bin RF, RF y] train R2: 0.9997307835856288, val R2: 0.9743582426996714, test R2: 0.9736393281757373


In [22]:
# print(f'[bit RF, true y] train R2: {r2_score(ftd_bit.predict(X_train_bit),y_train)}, val R2: {r2_score(ftd_bit.predict(X_val_bit),y_val)}, test R2: {r2_score(ftd_bit.predict(X_test_bit),y_test)}')
# print(f'[bit RF, RF y] train R2: {r2_score(ftd_bit.predict(X_train_bit),rf_orig.predict(X_train))}, val R2: {r2_score(ftd_bit.predict(X_val_bit),rf_orig.predict(X_val))}, test R2: {r2_score(ftd_bit.predict(X_test_bit),rf_orig.predict(X_test))}')
# print(f'[bin RF, true y] train R2: {r2_score(ftd_bin.predict(X_train_bin),y_train)}, val R2: {r2_score(ftd_bin.predict(X_val_bin),y_val)}, test R2: {r2_score(ftd_bin.predict(X_test_bin),y_test)}')
# print(f'[bin RF, RF y] train R2: {r2_score(ftd_bin.predict(X_train_bin),rf_orig.predict(X_train))}, val R2: {r2_score(ftd_bin.predict(X_val_bin),rf_orig.predict(X_val))}, test R2: {r2_score(ftd_bin.predict(X_test_bin),rf_orig.predict(X_test))}')

In [23]:
from itertools import compress

In [24]:
sorted([i for i in zip(list(compress(ftd_orig.features, ftd_orig.regression_model.coef_ != 0)), list(compress(ftd_orig.regression_model.coef_, ftd_orig.regression_model.coef_ != 0)))], key = lambda x: abs(x[1]))[-3:]

[(('modified_Zurich_class_E', 'spot_distribution_C', 'historically-complex_2'),
  -0.03889982177256009),
 (('modified_Zurich_class_F', 'largest_spot_size_H', 'largest_spot_size_K'),
  0.046259868962048235),
 (('modified_Zurich_class_E',
   'modified_Zurich_class_F',
   'spot_distribution_C'),
  0.11737226073553388)]

In [25]:
sorted([i for i in zip(list(compress(ftd_bit.features, ftd_bit.regression_model.coef_ != 0)), list(compress(ftd_bit.regression_model.coef_, ftd_bit.regression_model.coef_ != 0)))], key = lambda x: abs(x[1]))[-3:]

[(('largest_spot_size_bit1', 'area', 'activity'), 0.05071841272080413),
 (('activity',), -0.08004708506466442),
 (('largest_spot_size_bit0', 'spot_distribution_bit2', 'evolution_leaf_bit0'),
  -0.09377090026009628)]

In [26]:
sorted([i for i in zip(list(compress(ftd_bin.features, ftd_bin.regression_model.coef_ != 0)), list(compress(ftd_bin.regression_model.coef_, ftd_bin.regression_model.coef_ != 0)))], key = lambda x: abs(x[1]))[-3:]

[(('spot_distribution_C', 'modified_Zurich_class_F', 'largest_spot_size_H'),
  0.06214869098295132),
 (('modified_Zurich_class_D', 'spot_distribution_C', 'largest_spot_size_S'),
  -0.0628736939361165),
 (('largest_spot_size_X', 'modified_Zurich_class_C', 'spot_distribution_X'),
  -0.12295648372984518)]