### This notebook loads in the OpenML CC18 suite and classifies each dataset using the sklearn RandomForest classifier. The task IDs, accuracies, and runtimes of each dataset are compiled into .txt files for later analysis.

# Load in dataset

In [8]:
import openml
import sklearn
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from datetime import datetime

import warnings
warnings.filterwarnings('ignore')

benchmark_suite = openml.study.get_suite('OpenML-CC18')  # obtain the benchmark suite

# RandomForestClassifier

In [9]:
# build a scikit-learn classifier
clf = sklearn.pipeline.make_pipeline(sklearn.preprocessing.Imputer(),
                                     sklearn.ensemble.RandomForestClassifier())

for task_id in benchmark_suite.tasks:  # iterate over all tasks
    f = open("sklearnRF_accuracies_CC-18.txt","a")
    startTime = datetime.now()
    task = openml.tasks.get_task(task_id) # download the OpenML task
    openml.config.apikey = 'c9ea8896542dd998ea42685f14e2bc14'  # set the OpenML Api Key
    run = openml.runs.run_model_on_task(clf, task) # run classifier on splits (requires API key)
    score = run.get_metric_fn(sklearn.metrics.accuracy_score) # print accuracy score
    print('Data set: %s; Accuracy: %0.4f' % (task.get_dataset().name,score.mean()))
    print('Time: '+ str(datetime.now() - startTime))
    f.write('%i,%s,%0.4f,%s,\n' % (task_id,task.get_dataset().name,score.mean(),str(datetime.now() - startTime)))
    f.close()

Data set: kr-vs-kp; Accuracy: 0.9828
Time: 0:00:01.648604
Data set: letter; Accuracy: 0.9401
Time: 0:00:03.120219
Data set: balance-scale; Accuracy: 0.8271
Time: 0:00:00.717325
Data set: mfeat-factors; Accuracy: 0.9460
Time: 0:00:01.910577
Data set: mfeat-fourier; Accuracy: 0.8040
Time: 0:00:01.683703
Data set: breast-w; Accuracy: 0.9613
Time: 0:00:00.714478
Data set: mfeat-karhunen; Accuracy: 0.9065
Time: 0:00:01.603372
Data set: mfeat-morphological; Accuracy: 0.6950
Time: 0:00:00.854322
Data set: mfeat-zernike; Accuracy: 0.7595
Time: 0:00:01.463599
Data set: cmc; Accuracy: 0.5146
Time: 0:00:00.948737
Data set: optdigits; Accuracy: 0.9648
Time: 0:00:01.647045
Data set: credit-approval; Accuracy: 0.8652
Time: 0:00:00.920062
Data set: credit-g; Accuracy: 0.7330
Time: 0:00:01.052426
Data set: pendigits; Accuracy: 0.9868
Time: 0:00:01.884745
Data set: diabetes; Accuracy: 0.7487
Time: 0:00:00.731393
Data set: spambase; Accuracy: 0.9470
Time: 0:00:01.357894
Data set: splice; Accuracy: 0.927