In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

from src.experiments.experiment import *
from src.experiments.ExperimentSupervised import *
from src.xtrees.ForestBasedTree import *
from src.experiments.exact_paper import PrevPaperClassifier, fit_paper_fbt

SEED = 5

In [None]:
params = {
    'meta-params': {
        'is_classification': True,
        'random_state': SEED,
        'use_cross_validation': True,
        'cv_folds': 3
    },
    'data-params': [],
    'model-params': {}
}

rf_class = RandomForestClassifier(random_state=params['meta-params']['random_state'], n_estimators=10, max_depth=5)
dtrand_class = DecisionTreeClassifier(random_state=params['meta-params']['random_state'])
fbt_class = ForestBasedTree(random_state=params['meta-params']['random_state'], verbose=False)
paper_class = PrevPaperClassifier()


fitclass = FitClass(SEED)

model_instances = [rf_class, dtrand_class, fbt_class, paper_class]
fit_functions = [fitclass.fit_rf_class, 
                 fitclass.fit_dtrand_class, 
                 fitclass.fit_fbt_class,
                 fit_paper_fbt]

exp2 = Experiment(params)
exp2.perform_experiments(num_datasets=1, 
                        overall_size='medium', 
                        information='mixed', 
                        prediction='mixed', 
                        model_instances=model_instances, 
                        fit_functions=fit_functions)

results_class_df = exp2.assemble_results_dataframe()

results_class_df.to_csv(f'data/results/class_experiment{SEED}.csv')

Populated data-params with 1 datasets of overall size medium, information level mixed, and prediction level mixed.

Dataset ID: 1
n_samples     | n_features    | n_informative | n_classes     | n_redundant   | n_repeated    | random_state 
1300          | 100           | 10            | 5             | 30            | 10            | 5            

RandomForestClassifier
Running cross-validation with 3 folds...
DecisionTreeClassifier
Running cross-validation with 3 folds...


  _data = np.array(data, dtype=dtype, copy=copy,


ForestBasedTree
Running cross-validation with 3 folds...
PrevPaperClassifier
Running cross-validation with 3 folds...
0.9196079503330863 [10, 19]
0.9395867890916267 [10, 19, 88]
0.9519958504232249 [10, 19, 88, 77]
0.9592797777469613 [10, 19, 88, 77, 1]
0.9644899167417822 [10, 19, 88, 77, 1, 87]
0.9685263202107857 [10, 19, 88, 77, 1, 87, 60]
0.9714733264351508 [10, 19, 88, 77, 1, 87, 60, 62]
0.9738162972227704 [10, 19, 88, 77, 1, 87, 60, 62, 53]
0.9758930857276961 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94]
0.9773315034482023 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94, 18]
0.9787504200246414 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94, 18, 25]
0.9796534730037496 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94, 18, 25, 14]
0.9803381798398838 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94, 18, 25, 14, 23]
0.9809948850332553 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94, 18, 25, 14, 23, 44]
0.9815199158350623 [10, 19, 88, 77, 1, 87, 60, 62, 53, 94, 18, 25, 14, 23, 44, 24]
0.982032612579938 [10, 19, 88, 77, 1, 87, 60, 62, 53, 

In [2]:

avg_df = average_class_metrics(results_class_df)
print(avg_df.to_markdown())

NameError: name 'results_class_df' is not defined