## Default AutoGluon without Intel® Extension for Scikit-learn

Notebook is using default AutoGluon without Intel® Extension for Scikit-learn (sklearnex). It's used as baseline for another [notebook](https://www.kaggle.com/alex97andreev/tps-jun-autogluon-with-sklearnex).

In [None]:
!pip install autogluon.tabular[all] -q --progress-bar off

In [None]:
import pandas as pd
from timeit import default_timer as timer

In [None]:
competition_prefix = 'tabular-playground-series-jun-2021'

train_data = pd.read_csv(f'../input/{competition_prefix}/train.csv', index_col='id')
test_data = pd.read_csv(f'../input/{competition_prefix}/test.csv', index_col='id')
sample_submission = pd.read_csv(f'../input/{competition_prefix}/sample_submission.csv', index_col='id')

random_state = 42

label = 'target'

In [None]:
from autogluon.tabular import TabularPredictor


time_limit = 3600 * 8 # 8 hours time limit

# copy and modify default parameters from "fit" method
# (https://auto.gluon.ai/stable/api/autogluon.predictor.html#autogluon.tabular.TabularPredictor.fit)
# to fix random states and change n_neighbors parameter for KNN
hyperparameters = {
    'NN': {},
    'GBM': [
        {'extra_trees': True, 'seed': random_state, 'ag_args': {'name_suffix': 'XT'}},
        {},
        'GBMLarge',
    ],
    'CAT': {'random_seed': random_state},
    'XGB': {'seed': random_state},
    'FASTAI': {},
    'RF': [
        {'criterion': 'gini', 'random_state': random_state,
         'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}},
        {'criterion': 'entropy', 'random_state': random_state,
         'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}},
        {'criterion': 'mse', 'random_state': random_state,
         'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression']}},
    ],
    'XT': [
        {'criterion': 'gini', 'random_state': random_state,
         'ag_args': {'name_suffix': 'Gini', 'problem_types': ['binary', 'multiclass']}},
        {'criterion': 'entropy', 'random_state': random_state,
         'ag_args': {'name_suffix': 'Entr', 'problem_types': ['binary', 'multiclass']}},
        {'criterion': 'mse', 'random_state': random_state,
         'ag_args': {'name_suffix': 'MSE', 'problem_types': ['regression']}},
    ],
    'KNN': [
        {'weights': 'uniform', 'n_neighbors': 1000, 'ag_args': {'name_suffix': 'Unif'}},
        {'weights': 'distance', 'n_neighbors': 1000, 'ag_args': {'name_suffix': 'Dist'}},
    ],
}

t0 = timer()
autogluon_predictor = TabularPredictor(
    label=label,
    eval_metric="log_loss",
    learner_kwargs={'ignored_columns': ['id']}
).fit(
    train_data=train_data,
    time_limit=time_limit,
    presets='best_quality',
    verbosity=0,
    hyperparameters=hyperparameters
)
t1 = timer()
fitting_time = t1 - t0

leaderboard = autogluon_predictor.leaderboard()

In [None]:
leaderboard

In [None]:
t0 = timer()
predictions = autogluon_predictor.predict_proba(test_data)
t1 = timer()
prediction_time = t1 - t0
predictions.columns = list(sample_submission.columns)
predictions.index = sample_submission.index
predictions.to_csv('tps_jun_2021_autogluon_submission.csv')

predictions.head()

In [None]:
print('Fitting time[s]:', round(fitting_time, 3))
print('Prediction time[s]:', round(prediction_time, 3))

In [None]:
!rm -rf AutogluonModels