In [10]:
import pandas as pd
from datacleaner import autoclean
from concurrent.futures import ProcessPoolExecutor

from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.model_selection import train_test_split

from autosklearn.classification import AutoSklearnClassifier
from tpot import TPOTClassifier, TPOTRegressor
from auto_ml import Predictor
from pmlb import fetch_data

In [11]:
classification_datasets = ['allbp', 'allhyper', 'car', 'hypothyroid']

In [14]:
def tpot(name):
    X, y = fetch_data(name, return_X_y=True, local_cache_dir='../data/')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = TPOTClassifier(generations=3, population_size=50, verbosity=0)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    try:
        result = f1_score(y_test, y_pred)
    except:
        result = f1_score(y_test, y_pred, average='micro')
        
    print(f'dataset: {name} - metric: f1 score - tpot result: {result}')

In [15]:
def autosklearn(name):
    X, y = fetch_data(name, return_X_y=True, local_cache_dir='../data/')
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = AutoSklearnClassifier()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    try:
        result = f1_score(y_test, y_pred)
    except:
        result = f1_score(y_test, y_pred, average='micro')
        
    print(f'dataset: {name} - metric: f1 score - autosklearn result: {result}')

In [16]:
with ProcessPoolExecutor(max_workers=3) as pool:
    for name in classification_datasets:
        pool.submit(tpot, name)
        pool.submit(autosklearn, name)

dataset: allhyper - metric: f1 score - tpot result: 0.9867549668874173
dataset: allbp - metric: f1 score - tpot result: 0.9721854304635762
dataset: car - metric: f1 score - tpot result: 0.9971098265895953
dataset: allbp - metric: f1 score - autosklearn result: 0.9735099337748344
dataset: hypothyroid - metric: f1 score - tpot result: 0.9875104079933389
dataset: allhyper - metric: f1 score - autosklearn result: 0.9867549668874173
dataset: car - metric: f1 score - autosklearn result: 0.9913294797687862
dataset: hypothyroid - metric: f1 score - autosklearn result: 0.9891576313594662
