In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

from src.experiments.experiment import *
from src.experiments.ExperimentSupervised import *
from src.xtrees.ForestBasedTree import *

SEED = 10

In [2]:
params = {
    'meta-params': {
        'is_classification': False,
        'train_test_split': 0.2,
        'random_state': SEED
    },
    'data-params': [],
    'model-params': {}
}

rf_reg = RandomForestRegressor(random_state=params['meta-params']['random_state'], n_estimators=10, max_depth=5)
dtrand_reg = DecisionTreeRegressor(random_state=params['meta-params']['random_state'])
fbt_reg = ForestBasedTree(random_state=params['meta-params']['random_state'], verbose=False)

fitreg = FitReg(SEED)

model_instances = [rf_reg, 
                   dtrand_reg, 
                   fbt_reg]
fit_functions = [fitreg.fit_rf_reg, 
                 fitreg.fit_dtrand_reg, 
                 fitreg.fit_fbt_reg]

exp = Experiment(params)
exp.perform_experiments(num_datasets=10, 
                        overall_size='mixed', 
                        information='mixed', 
                        prediction='mixed', 
                        model_instances=model_instances, 
                        fit_functions=fit_functions)

results_reg_df = exp.assemble_results_dataframe()

results_reg_df.to_csv(f'data/results/reg_experiment{SEED}.csv')

Populated data-params with 10 datasets of overall size mixed, information level mixed, and prediction level mixed.

Dataset ID: 1
n_samples     | n_features    | n_informative | tail_strength | random_state 
7000          | 200           | 20            | 0.5000        | 10           

RandomForestRegressor
DecisionTreeRegressor
ForestBasedTree
Metric         RandomForestRegressor           | DecisionTreeRegressor           | ForestBasedTree                
experiment_id   | 1                               | 2                               | 3                              
mae             | 156.0324                        | 202.3546                        | 182.2798                       
mse             | 37768.2603                      | 63121.3967                      | 50669.2321                     
pred_time       | 0.002                           | 0.0007                          | 0.1304                         
r2              | 0.4903                          | 0.1482        

In [3]:
avg_df = average_reg_metrics(results_reg_df)
print(avg_df.to_markdown())

|    | model_name            | train_time   | pred_time   | normalized_mae   | normalized_mse   | r2        |
|---:|:----------------------|:-------------|:------------|:-----------------|:-----------------|:----------|
|  0 | DecisionTreeRegressor | 0.28±0.62    | 0.0±0.0     | 0.93±0.1         | 0.89±0.18        | 0.04±0.29 |
|  1 | ForestBasedTree       | 29.98±37.15  | 0.07±0.1    | 0.83±0.22        | 0.71±0.3         | 0.21±0.41 |
|  2 | RandomForestRegressor | 0.91±1.44    | 0.0±0.0     | 0.69±0.19        | 0.51±0.24        | 0.44±0.32 |
