### This notebook loads in the OpenML 100 suite and classifies each dataset using the sklearn RandomForest classifier. The task IDs, accuracies, and runtimes of each dataset are compiled into .txt files for later analysis.

# Load in dataset

In [3]:
import openml
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

benchmark_suite = openml.study.get_suite('OpenML100')  # obtain the benchmark suite

# RandomForestClassifier

In [4]:
# build a scikit-learn classifier
clf = sklearn.pipeline.make_pipeline(sklearn.preprocessing.Imputer(),
                                     sklearn.ensemble.RandomForestClassifier())

for task_id in benchmark_suite.tasks:  # iterate over all tasks
    try:
        f = open("sklearnRF_accuracies_100.txt","a")
        startTime = datetime.now()
        task = openml.tasks.get_task(task_id) # download the OpenML task
        openml.config.apikey = 'c9ea8896542dd998ea42685f14e2bc14'  # set the OpenML Api Key
        run = openml.runs.run_model_on_task(clf, task) # run classifier on splits (requires API key)
        score = run.get_metric_fn(sklearn.metrics.accuracy_score) # print accuracy score
        print('Data set: %s; Accuracy: %0.2f' % (task.get_dataset().name,score.mean()))
        print('Time: '+ str(datetime.now() - startTime))
        f.write('%i,%s,%0.4f,%s,\n' % (task_id,task.get_dataset().name,score.mean(),str(datetime.now() - startTime)))
        f.close()
    except:
        print('Error in' + str(task_id))

Data set: kr-vs-kp; Accuracy: 0.99
Time: 0:00:01.596959
Data set: letter; Accuracy: 0.94
Time: 0:00:02.926510
Data set: balance-scale; Accuracy: 0.82
Time: 0:00:00.759715
Data set: mfeat-factors; Accuracy: 0.95
Time: 0:00:02.035574
Data set: mfeat-fourier; Accuracy: 0.80
Time: 0:00:01.658692
Data set: breast-w; Accuracy: 0.96
Time: 0:00:00.716054
Data set: mfeat-karhunen; Accuracy: 0.91
Time: 0:00:01.613347
Data set: mfeat-morphological; Accuracy: 0.69
Time: 0:00:00.823982
Data set: mfeat-pixel; Accuracy: 0.96
Time: 0:00:05.806789
Data set: car; Accuracy: 0.97
Time: 0:00:00.856283
Data set: mfeat-zernike; Accuracy: 0.75
Time: 0:00:01.507111
Data set: cmc; Accuracy: 0.50
Time: 0:00:00.963083
Data set: mushroom; Accuracy: 1.00
Time: 0:00:01.388118
Data set: optdigits; Accuracy: 0.97
Time: 0:00:01.575346
Data set: credit-approval; Accuracy: 0.87
Time: 0:00:00.927174
Data set: credit-g; Accuracy: 0.75
Time: 0:00:01.032303
Data set: pendigits; Accuracy: 0.99
Time: 0:00:01.988456
Data set: s