In [10]:
import pandas as pd
import test_on_representative_sample as tors
import threadpoolctl

threadpoolctl.threadpool_limits(limits=1)

from sklearn import utils
from inspect import signature

ests = utils.all_estimators(type_filter='regressor')

df = []

for est in ests:
    est = est[1]
    if 'random_state' in signature(est).parameters and 'sample_weight' in signature(est.fit).parameters:
        
        ## record in dataframe name of estimator, and all pvalues and minimum pvalue and average pvalue
        try:
            df.append(tors.paired_test(est, n_features = 10, test='welch', max_seed=20,
                    train_size = 200, n_samples_per_cv_group=1000,
                    rep_test_size = 5, random_rejection_level = 0.1, max_repeats=3,
                    correct_threshold=True, equal_var=False))
        except ValueError:
            print(est, "with different random state led to the same predictions")
        
df = pd.DataFrame(df)
df.sort_values(by=['min_p_value'])



100%|██████████| 20/20 [00:02<00:00,  9.12it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'> in ---- 2.2554209232330322 s---
Average difference in medians is: -0.5460646264199213
Minimum p-values:  0.09609265786876406


100%|██████████| 20/20 [00:00<00:00, 21.34it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.ensemble._bagging.BaggingRegressor'> in ---- 0.963310956954956 s---
Average difference in medians is: -27.928213463454988
Minimum p-values:  0.008513687118345398


100%|██████████| 20/20 [00:00<00:00, 294.94it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.tree._classes.DecisionTreeRegressor'> in ---- 0.07504987716674805 s---
Average difference in medians is: 35.45191836248365
Minimum p-values:  0.3184691333120901


100%|██████████| 20/20 [00:00<00:00, 1137.79it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.linear_model._coordinate_descent.ElasticNet'> in ---- 0.023733854293823242 s---
Average difference in medians is: 1.5631940186722205e-14
Minimum p-values:  0.9999999999622764


100%|██████████| 20/20 [00:00<00:00, 53.73it/s]


<class 'sklearn.linear_model._coordinate_descent.ElasticNetCV'> with different random state led to the same predictions


100%|██████████| 20/20 [00:00<00:00, 551.43it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.tree._classes.ExtraTreeRegressor'> in ---- 0.04305100440979004 s---
Average difference in medians is: 30.495626065126846
Minimum p-values:  0.08628168607377737


100%|██████████| 20/20 [00:03<00:00,  5.91it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.ensemble._forest.ExtraTreesRegressor'> in ---- 3.4791250228881836 s---
Average difference in medians is: 6.71225913727248
Minimum p-values:  0.005919999389758984


100%|██████████| 20/20 [00:02<00:00,  8.63it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.ensemble._gb.GradientBoostingRegressor'> in ---- 2.3727619647979736 s---
Average difference in medians is: 3.5419571700995105
Minimum p-values:  0.37905714987843253


100%|██████████| 20/20 [00:06<00:00,  3.09it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.ensemble._hist_gradient_boosting.gradient_boosting.HistGradientBoostingRegressor'> in ---- 6.648483991622925 s---
Average difference in medians is: 6.100866686620196
Minimum p-values:  0.003312970290864194


100%|██████████| 20/20 [00:00<00:00, 1254.47it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.linear_model._coordinate_descent.Lasso'> in ---- 0.02173900604248047 s---
Average difference in medians is: -1.0302869668521453e-13
Minimum p-values:  0.9999999998960611


100%|██████████| 20/20 [00:00<00:00, 48.25it/s]


<class 'sklearn.linear_model._coordinate_descent.LassoCV'> with different random state led to the same predictions


100%|██████████| 20/20 [00:00<00:00, 1297.52it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.svm._classes.LinearSVR'> in ---- 0.021360158920288086 s---
Average difference in medians is: -0.05246849469306199
Minimum p-values:  0.33046081220803225


100%|██████████| 20/20 [00:00<00:00, 29.79it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.linear_model._ransac.RANSACRegressor'> in ---- 0.6958708763122559 s---
Average difference in medians is: 13.782583328617045
Minimum p-values:  0.10083091787109318


100%|██████████| 20/20 [00:04<00:00,  4.21it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.ensemble._forest.RandomForestRegressor'> in ---- 4.858827114105225 s---
Average difference in medians is: 5.327095744289369
Minimum p-values:  0.009933887074082371


100%|██████████| 20/20 [00:00<00:00, 130.51it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.linear_model._ridge.Ridge'> in ---- 0.1690688133239746 s---
Average difference in medians is: 0.002782941311837561
Minimum p-values:  0.37568642602985114


100%|██████████| 20/20 [00:00<00:00, 686.44it/s]


Finished looping till the maximum random state, 20 for estimator <class 'sklearn.linear_model._stochastic_gradient.SGDRegressor'> in ---- 0.036041975021362305 s---
Average difference in medians is: 1.0578352549733707
Minimum p-values:  0.06927330386146749


Unnamed: 0,Name,p_values,min_p_value,avg_p_value
7,HistGradientBoostingRegressor,"[0.005694429104387728, 0.8227848974874117, 0.0...",0.003313,0.214009
5,ExtraTreesRegressor,"[0.6366380334124973, 0.005919999389758984, 0.1...",0.00592,0.412829
1,BaggingRegressor,"[0.13540856842602533, 0.8587031071819282, 0.80...",0.008514,0.482298
11,RandomForestRegressor,"[0.046507275966178666, 0.38495241637529587, 0....",0.009934,0.28607
13,SGDRegressor,"[0.5796119636647556, 0.8941347613654888, 0.310...",0.069273,0.395158
4,ExtraTreeRegressor,"[0.8330245825956623, 0.7485380970212634, 0.935...",0.086282,0.574012
0,AdaBoostRegressor,"[0.4712220335452597, 0.8041528076057658, 0.096...",0.096093,0.553925
10,RANSACRegressor,"[0.10083091787109318, 0.1426168440707225, 0.62...",0.100831,0.379464
2,DecisionTreeRegressor,"[0.4248415824836401, 0.628051623619422, 0.3908...",0.318469,0.506147
9,LinearSVR,"[0.7518051575700552, 0.9998116667990791, 0.865...",0.330461,0.685188
