In [1]:
import os
import warnings

warnings.filterwarnings('ignore')

import autosklearn.classification
import joblib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from datetime import datetime
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree

pd.options.display.max_columns = 999
RANDOM_STATE = 42


MODEL_DIR = os.path.join(
    'results',
    f"automl-single-model-{datetime.strftime(datetime.now(), '%Y-%M-%d-%H-%M-%S')}"
)

os.makedirs(MODEL_DIR)

MODEL_PATH = os.path.join(MODEL_DIR, 'model.joblib')
DATA_PATH = 'data/final_train.csv'

In [1]:
from autosklearn.experimental.askl2 import AutoSklearn2Classifier

  self.re = re.compile(self.reString)


In [2]:
df = pd.read_csv(DATA_PATH, index_col=0)

x, y = df.drop(columns = ['Activity', 'void()', 'subject']), df['Activity']
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=RANDOM_STATE)

automl = AutoSklearn2Classifier(
    time_left_for_this_task = 60*60*8,
    ensemble_size=1,
    n_jobs=-1
)
automl.fit(x_train, y_train)

joblib.dump(automl, MODEL_PATH)

y_train_hat = automl.predict(x_train)
y_test_hat = automl.predict(x_test)

In [3]:
print('Train results')
print(classification_report(y_train, y_train_hat))
print(confusion_matrix(y_train, y_train_hat))
print('-'*40)

Train results
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00      1650
           SITTING       0.99      0.99      0.99      1479
          STANDING       1.00      0.99      0.99      1500
           WALKING       1.00      1.00      1.00      1417
WALKING_DOWNSTAIRS       1.00      1.00      1.00      1047
  WALKING_UPSTAIRS       1.00      1.00      1.00      1221

          accuracy                           1.00      8314
         macro avg       1.00      1.00      1.00      8314
      weighted avg       1.00      1.00      1.00      8314

[[1650    0    0    0    0    0]
 [   0 1471    7    0    0    1]
 [   0   17 1483    0    0    0]
 [   0    0    0 1417    0    0]
 [   0    0    0    0 1047    0]
 [   0    0    0    0    0 1221]]
----------------------------------------


In [4]:
print('Test results')
print(classification_report(y_test, y_test_hat))
print(confusion_matrix(y_test, y_test_hat))
print('-'*40)

Test results
                    precision    recall  f1-score   support

            LAYING       1.00      1.00      1.00       517
           SITTING       0.98      0.97      0.97       540
          STANDING       0.96      0.97      0.97       479
           WALKING       1.00      1.00      1.00       483
WALKING_DOWNSTAIRS       0.99      1.00      0.99       341
  WALKING_UPSTAIRS       1.00      0.99      1.00       412

          accuracy                           0.99      2772
         macro avg       0.99      0.99      0.99      2772
      weighted avg       0.99      0.99      0.99      2772

[[517   0   0   0   0   0]
 [  0 523  17   0   0   0]
 [  0  13 466   0   0   0]
 [  0   0   0 483   0   0]
 [  0   0   0   1 340   0]
 [  0   0   0   0   4 408]]
----------------------------------------


In [5]:
automl.get_models_with_weights()

[(1.0,
  SimpleClassificationPipeline({'balancing:strategy': 'none', 'classifier:__choice__': 'libsvm_svc', 'data_preprocessing:categorical_transformer:categorical_encoding:__choice__': 'no_encoding', 'data_preprocessing:categorical_transformer:category_coalescence:__choice__': 'minority_coalescer', 'data_preprocessing:numerical_transformer:imputation:strategy': 'median', 'data_preprocessing:numerical_transformer:rescaling:__choice__': 'minmax', 'feature_preprocessor:__choice__': 'no_preprocessing', 'classifier:libsvm_svc:C': 12923.91167672086, 'classifier:libsvm_svc:gamma': 0.028106748647672205, 'classifier:libsvm_svc:kernel': 'rbf', 'classifier:libsvm_svc:max_iter': -1, 'classifier:libsvm_svc:shrinking': 'False', 'classifier:libsvm_svc:tol': 2.0706197108771777e-05, 'data_preprocessing:categorical_transformer:category_coalescence:minority_coalescer:minimum_fraction': 0.03972767277005085},
  dataset_properties={
    'task': 2,
    'sparse': False,
    'multilabel': False,
    'multicla