# Benchmarking regression models with MLBenchmarks

## Installation

In [1]:
!pip install git+https://github.com/rcpsilva/MLBenchmarks@main

  Running command git clone --filter=blob:none --quiet https://github.com/rcpsilva/MLBenchmarks 'C:\Users\rcpsi\AppData\Local\Temp\pip-req-build-a3zv6crj'


Collecting git+https://github.com/rcpsilva/MLBenchmarks@main
  Cloning https://github.com/rcpsilva/MLBenchmarks (to revision main) to c:\users\rcpsi\appdata\local\temp\pip-req-build-a3zv6crj
  Resolved https://github.com/rcpsilva/MLBenchmarks to commit 932f7f77785d061bed8489a0d410cc104c612298
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'


## Imports

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures
from MLBenchmarks.benchmarking_methods  import load_classification_datasets, run_cross_dataset_benchmark_models
import warnings
warnings.filterwarnings("ignore")

## Load datasets

In [3]:
# Load all the available regression datasets
datasets = load_classification_datasets()

Running load_dry_bean ...
Running load_mushroom ...
Running load_spambase ...
Running load_student_dropout ...
Running load_wine ...


## Define models and pipelines

In [4]:
# Benchmark pipelines
pipeline_linear_rf = Pipeline([
    ('feature_extraction', FeatureUnion([
        ('pca', PCA(n_components=5)),
        ('polynomial_features', PolynomialFeatures(degree=2)),
    ])),
    ('regressor', DecisionTreeClassifier())
])

# Add the modified pipeline and selected models to a dictionary dictionary
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Pipeline (Linear + Decision Tree)": pipeline_linear_rf
}


## Define Evaluation Metrics

In [5]:
metrics = ['accuracy','f1_weighted'] # accepts scikit-learn metrics

## Run experiment

In [6]:
output_json = 'clasification_benchmarks.json'
res = run_cross_dataset_benchmark_models(models, datasets, metrics, output_json, cv=5)

100%|██████████| 5/5 [00:03<00:00,  1.55it/s]
100%|██████████| 5/5 [00:04<00:00,  1.04it/s]
100%|██████████| 5/5 [01:21<00:00, 16.29s/it]
100%|██████████| 3/3 [01:29<00:00, 29.83s/it]


## Print results

In [7]:
import numpy as np

### Print in natural order 

In [8]:
for model in res:
    print(f'{model}')
    for dataset in res[model]:
        print(f'\t{dataset}')
        for metric in res[model][dataset]:
            results = res[model][dataset][metric]
            print(f'\t\t{metric}: {np.mean(results):.3f} +- {np.std(results):.3f}')

Logistic Regression
	load_dry_bean
		fit_time: 0.300 +- 0.112
		score_time: 0.003 +- 0.000
		test_accuracy: 0.629 +- 0.193
		test_f1_weighted: 0.601 +- 0.223
		memory_usage(MB): 0.000 +- 0.000
	load_mushroom
		fit_time: 0.058 +- 0.008
		score_time: 0.003 +- 0.000
		test_accuracy: 0.840 +- 0.146
		test_f1_weighted: 0.836 +- 0.149
		memory_usage(MB): 0.000 +- 0.000
	load_spambase
		fit_time: 0.062 +- 0.005
		score_time: 0.002 +- 0.001
		test_accuracy: 0.905 +- 0.036
		test_f1_weighted: 0.905 +- 0.036
		memory_usage(MB): 0.000 +- 0.000
	load_student_dropout
		fit_time: 0.088 +- 0.006
		score_time: 0.002 +- 0.000
		test_accuracy: 0.680 +- 0.015
		test_f1_weighted: 0.628 +- 0.013
		memory_usage(MB): 0.000 +- 0.000
	load_wine
		fit_time: 0.016 +- 0.002
		score_time: 0.002 +- 0.001
		test_accuracy: 0.956 +- 0.042
		test_f1_weighted: 0.955 +- 0.042
		memory_usage(MB): 0.000 +- 0.000
Decision Tree Classifier
	load_dry_bean
		fit_time: 0.530 +- 0.058
		score_time: 0.003 +- 0.000
		test_accuracy:

### Compare results in each dataset

In [9]:
models = list(res.keys())
models

['Logistic Regression',
 'Decision Tree Classifier',
 'Pipeline (Linear + Decision Tree)']

In [11]:
models = list(res.keys())
datasets = list(res[models[0]].keys())
metrics = list(res[models[0]][datasets[0]].keys())

print(models)
print(datasets)
print(metrics)

for dataset in datasets:
    print(f'{dataset}')
    for metric in metrics[3:4]: # f1_weighted
        print(f'\t{metric}')
        for model in models:
            print(f'\t\t{model:>35}:\t {np.mean(res[model][dataset][metric]):.3f} \t +- {np.std(res[model][dataset][metric]):.3f}')

['Logistic Regression', 'Decision Tree Classifier', 'Pipeline (Linear + Decision Tree)']
['load_dry_bean', 'load_mushroom', 'load_spambase', 'load_student_dropout', 'load_wine']
['fit_time', 'score_time', 'test_accuracy', 'test_f1_weighted', 'memory_usage(MB)']
load_dry_bean
	test_f1_weighted
		                Logistic Regression:	 0.601 	 +- 0.223
		           Decision Tree Classifier:	 0.504 	 +- 0.180
		  Pipeline (Linear + Decision Tree):	 0.567 	 +- 0.158
load_mushroom
	test_f1_weighted
		                Logistic Regression:	 0.836 	 +- 0.149
		           Decision Tree Classifier:	 0.915 	 +- 0.118
		  Pipeline (Linear + Decision Tree):	 0.889 	 +- 0.152
load_spambase
	test_f1_weighted
		                Logistic Regression:	 0.905 	 +- 0.036
		           Decision Tree Classifier:	 0.886 	 +- 0.052
		  Pipeline (Linear + Decision Tree):	 0.885 	 +- 0.045
load_student_dropout
	test_f1_weighted
		                Logistic Regression:	 0.628 	 +- 0.013
		           Decision Tree Classi