In [3]:
import pandas as pd 

from sklearn.datasets import load_digits
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.model_selection import train_test_split

from supervised.automl import AutoML

from utils import load_train_data, load_train_labels, load_test_data

In [2]:

data = load_train_data()
labels = load_train_labels()

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.33, random_state=42)

# train models with AutoML
automl = AutoML(mode="Compete", eval_metric='accuracy', total_time_limit=3600*6)
automl.fit(X_train, y_train.ravel())

# compute the accuracy on test data
predictions = automl.predict_all(X_test)
print(predictions.head())
print("Test accuracy:", balanced_accuracy_score(y_test, predictions["label"].astype(int)))

AutoML directory: AutoML_2
The task is binary_classification with evaluation metric accuracy
AutoML will use algorithms: ['Decision Tree', 'Linear', 'Random Forest', 'Extra Trees', 'LightGBM', 'Xgboost', 'CatBoost', 'Neural Network', 'Nearest Neighbors']
AutoML will stack models
AutoML will ensemble available models
AutoML steps: ['adjust_validation', 'simple_algorithms', 'default_algorithms', 'not_so_random', 'golden_features', 'insert_random_feature', 'features_selection', 'hill_climbing_1', 'hill_climbing_2', 'boost_on_errors', 'ensemble', 'stack', 'ensemble_stacked']
* Step adjust_validation will try to check up to 1 model
1_DecisionTree accuracy 0.701493 trained in 0.67 seconds
Adjust validation. Remove: 1_DecisionTree
Validation strategy: 10-fold CV Shuffle,Stratify
* Step simple_algorithms will try to check up to 4 models
1_DecisionTree accuracy 0.691791 trained in 3.29 seconds
2_DecisionTree accuracy 0.738806 trained in 3.55 seconds
3_DecisionTree accuracy 0.740299 trained in 3

Feature names unseen at fit time:
- 11_Xgboost_prediction
- 12_Xgboost_prediction
- 19_Xgboost_prediction
- 1_DecisionTree_prediction
- 21_LightGBM_prediction
- ...
Feature names seen at fit time, yet now missing:
- 11_Xgboost_prediction_0_for_-1_1_for_1
- 12_Xgboost_prediction_0_for_-1_1_for_1
- 19_Xgboost_prediction_0_for_-1_1_for_1
- 1_DecisionTree_prediction_0_for_-1_1_for_1
- 21_LightGBM_prediction_0_for_-1_1_for_1
- ...

Feature names unseen at fit time:
- 11_Xgboost_prediction
- 12_Xgboost_prediction
- 19_Xgboost_prediction
- 1_DecisionTree_prediction
- 21_LightGBM_prediction
- ...
Feature names seen at fit time, yet now missing:
- 11_Xgboost_prediction_0_for_-1_1_for_1
- 12_Xgboost_prediction_0_for_-1_1_for_1
- 19_Xgboost_prediction_0_for_-1_1_for_1
- 1_DecisionTree_prediction_0_for_-1_1_for_1
- 21_LightGBM_prediction_0_for_-1_1_for_1
- ...

Feature names unseen at fit time:
- 11_Xgboost_prediction
- 12_Xgboost_prediction
- 19_Xgboost_prediction
- 1_DecisionTree_prediction
- 21

   prediction_-1  prediction_1  label
0       0.002614      0.997386      1
1       0.969189      0.030811     -1
2       0.849194      0.150806     -1
3       0.973407      0.026593     -1
4       0.986324      0.013676     -1
Test accuracy: 0.8636497208345577


In [15]:
automl = AutoML(results_path="AutoML_2")
X_test = load_test_data()

y_pred_1 = automl.predict_proba(X_test)

Feature names unseen at fit time:
- 11_Xgboost_prediction
- 12_Xgboost_prediction
- 19_Xgboost_prediction
- 1_DecisionTree_prediction
- 21_LightGBM_prediction
- ...
Feature names seen at fit time, yet now missing:
- 11_Xgboost_prediction_0_for_-1_1_for_1
- 12_Xgboost_prediction_0_for_-1_1_for_1
- 19_Xgboost_prediction_0_for_-1_1_for_1
- 1_DecisionTree_prediction_0_for_-1_1_for_1
- 21_LightGBM_prediction_0_for_-1_1_for_1
- ...

Feature names unseen at fit time:
- 11_Xgboost_prediction
- 12_Xgboost_prediction
- 19_Xgboost_prediction
- 1_DecisionTree_prediction
- 21_LightGBM_prediction
- ...
Feature names seen at fit time, yet now missing:
- 11_Xgboost_prediction_0_for_-1_1_for_1
- 12_Xgboost_prediction_0_for_-1_1_for_1
- 19_Xgboost_prediction_0_for_-1_1_for_1
- 1_DecisionTree_prediction_0_for_-1_1_for_1
- 21_LightGBM_prediction_0_for_-1_1_for_1
- ...

Feature names unseen at fit time:
- 11_Xgboost_prediction
- 12_Xgboost_prediction
- 19_Xgboost_prediction
- 1_DecisionTree_prediction
- 21

In [19]:
with open('artificial_test.label', 'w') as f:
    for y_single in y_pred[:,1]:
        f.write(f'{y_single.item()}\n')