In [1]:
import pandas as pd 
# scikit learn utilites
from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
# mljar-supervised package
from supervised.automl import AutoML

In [2]:
csp = pd.read_csv('https://healthdata.gov/resource/nqtp-eetp.csv')
csp

Unnamed: 0,staff_involved,acf,acl,ahrq,aspr,cdc,cms,fda,hrsa,ihs,nih,os,samhsa,total
0,Staff normally paid from or shifted to adminis...,1.0,0.0,0.0,0.0,75.0,1662.0,0.0,792.0,61.0,0.0,1655.0,0.0,4246.0
1,Staff normally paid from or shifted to carryov...,639.0,8.0,13.0,428.0,2303.0,525.0,11714.0,523.0,258.0,230.0,1717.0,0.0,18358.0
2,Staff normally paid from or shifted to reimbur...,0.0,2.0,0.0,0.0,137.0,0.0,67.0,0.0,11766.0,0.0,19.0,0.0,11991.0
3,Commissioned Corps (excepted) /1,5.0,0.0,5.0,118.0,643.0,103.0,245.0,73.0,1524.0,193.0,228.0,36.0,3173.0
4,HHS officers appointed by the President (exempt),1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,9.0,1.0,15.0
5,Activities required to ensure that fully funde...,55.0,0.0,0.0,0.0,669.0,125.0,268.0,69.0,415.0,104.0,217.0,0.0,1922.0
6,Law enforcement activities.,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,68.0,0.0,0.0,79.0
7,Orderly phase-down and suspension of operations,19.0,12.0,6.0,3.0,337.0,100.0,10.0,66.0,37.0,255.0,173.0,27.0,1045.0
8,Other,0.0,0.0,0.0,1.0,42.0,0.0,0.0,0.0,0.0,95.0,79.0,0.0,217.0
9,Staff to be furloughed.,719.0,157.0,252.0,0.0,7693.0,3297.0,3595.0,1282.0,554.0,14687.0,2264.0,574.0,35074.0


In [3]:
# load the data
csp = load_digits()
X_train, X_test, y_train, y_test = train_test_split(
    pd.DataFrame(csp.data), csp.target, stratify=csp.target, test_size=0.25,
    random_state=123
)

# train models with AutoML
automl = AutoML(mode="Perform")
automl.fit(X_train, y_train)

# compute the accuracy on test data
predictions = automl.predict_all(X_test)
print(predictions.head())
print("Test accuracy:", accuracy_score(y_test, predictions["label"].astype(int)))

AutoML directory: AutoML_1
The task is multiclass_classification with evaluation metric logloss
AutoML will use algorithms: ['Linear', 'Random Forest', 'LightGBM', 'Xgboost', 'Neural Network']
AutoML will ensemble available models
AutoML steps: ['simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'ensemble']
* Step simple_algorithms will try to check up to 1 model
1_Linear logloss 0.11709 trained in 14.6 seconds (1-sample predict time 0.0624 seconds)
* Step default_algorithms will try to check up to 4 models
2_Default_LightGBM logloss 0.119508 trained in 122.49 seconds (1-sample predict time 0.1152 seconds)
3_Default_Xgboost logloss 0.141544 trained in 55.13 seconds (1-sample predict time 0.1132 seconds)
4_Default_NeuralNetwork logloss 0.244656 trained in 4.1 seconds (1-sample predict time 0.0588 seconds)
5_Default_RandomForest logloss 0.835827 trained in 17.71 seconds (1-sam

In [4]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.0,0.0,2.0,10.0,14.0,10.0,0.0,0.0,0.0,1.0,...,9.0,0.0,0.0,0.0,1.0,10.0,15.0,16.0,6.0,0.0
1,0.0,0.0,15.0,13.0,0.0,3.0,3.0,0.0,0.0,0.0,...,3.0,0.0,0.0,1.0,12.0,16.0,15.0,7.0,0.0,0.0
2,0.0,0.0,8.0,12.0,12.0,14.0,3.0,0.0,0.0,0.0,...,2.0,0.0,0.0,1.0,11.0,14.0,15.0,3.0,0.0,0.0
3,0.0,0.0,15.0,15.0,2.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,0.0,0.0
4,0.0,0.0,4.0,11.0,16.0,16.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,5.0,12.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
445,0.0,1.0,12.0,16.0,13.0,2.0,0.0,0.0,0.0,5.0,...,7.0,0.0,0.0,0.0,16.0,16.0,15.0,8.0,1.0,0.0
446,0.0,0.0,7.0,16.0,12.0,1.0,0.0,0.0,0.0,0.0,...,14.0,0.0,0.0,0.0,7.0,16.0,16.0,16.0,4.0,0.0
447,0.0,6.0,13.0,5.0,8.0,8.0,1.0,0.0,0.0,8.0,...,0.0,0.0,0.0,4.0,16.0,15.0,3.0,0.0,0.0,0.0
448,0.0,0.0,1.0,10.0,15.0,11.0,7.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,12.0,16.0,2.0,0.0,0.0


In [5]:
y_test


array([9, 8, 3, 7, 7, 8, 1, 9, 2, 2, 1, 8, 5, 3, 2, 3, 8, 0, 1, 0, 3, 1,
       4, 0, 6, 0, 9, 9, 8, 9, 7, 3, 2, 2, 4, 7, 1, 3, 7, 4, 5, 3, 2, 3,
       2, 8, 1, 3, 3, 6, 1, 3, 2, 9, 2, 6, 9, 3, 2, 6, 1, 7, 6, 2, 2, 1,
       0, 8, 7, 8, 6, 8, 7, 1, 9, 2, 5, 7, 1, 5, 0, 0, 9, 2, 2, 5, 0, 5,
       5, 4, 3, 0, 6, 1, 9, 6, 7, 4, 2, 7, 5, 0, 7, 0, 7, 3, 8, 0, 7, 9,
       4, 8, 6, 7, 5, 2, 3, 3, 9, 6, 2, 8, 9, 6, 4, 2, 3, 7, 5, 4, 7, 0,
       7, 1, 0, 7, 3, 5, 4, 3, 7, 0, 6, 3, 5, 5, 5, 1, 9, 2, 9, 2, 5, 1,
       4, 3, 4, 6, 9, 6, 1, 0, 1, 4, 5, 5, 0, 4, 9, 8, 0, 3, 8, 4, 6, 0,
       5, 2, 3, 0, 2, 0, 1, 8, 4, 8, 1, 0, 9, 5, 3, 3, 3, 4, 3, 9, 9, 8,
       8, 6, 7, 4, 6, 7, 0, 8, 6, 9, 7, 1, 4, 1, 5, 0, 5, 6, 0, 3, 8, 2,
       5, 7, 9, 0, 4, 0, 4, 4, 0, 1, 1, 9, 2, 3, 2, 9, 2, 8, 7, 5, 1, 5,
       4, 1, 3, 7, 7, 4, 4, 9, 4, 8, 8, 0, 5, 5, 3, 0, 2, 0, 9, 7, 5, 3,
       2, 0, 6, 0, 6, 5, 9, 8, 9, 2, 1, 7, 8, 0, 9, 0, 0, 0, 8, 8, 2, 7,
       7, 6, 1, 1, 9, 9, 3, 1, 3, 5, 0, 1, 6, 8, 3,