# Import AutoGluon

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor
from autogluon.core.utils import generate_train_test_split

# Download Data and Create TabularDataset Object

In [None]:
path_prefix = 'https://autogluon.s3.amazonaws.com/datasets/airlines/all/'
path_train = path_prefix + 'train_data.parquet'

https://autogluon.s3.amazonaws.com/datasets/airlines/all/train_data.parquet

data = TabularDataset(path_train)

In [None]:
data

# Randomly Sample 1Million Flights

In [None]:
LABEL = 'target'
SAMPLE = 1_000_000

In [None]:
if SAMPLE is not None and SAMPLE < len(data):
    data = data.sample(n=SAMPLE, random_state=0)

In [None]:
data.shape

# Split Train and Test Data

In [None]:
train_data, test_data, train_labels, test_labels = generate_train_test_split(
    X=data.drop(LABEL, axis=1),
    y=data[LABEL],
    problem_type='binary',
    test_size=0.1
)
train_data[LABEL] = train_labels
test_data[LABEL] = test_labels

# Run AutoGluon with Multiple RAPIDS Models

In [None]:
from autogluon.tabular.models.rf.rf_rapids_model import RFRapidsModel
from autogluon.tabular.models.knn.knn_rapids_model import KNNRapidsModel
from autogluon.tabular.models.lr.lr_rapids_model import LinearRapidsModel

predictor = TabularPredictor(
    label=LABEL,
    verbosity=3,
).fit(
    train_data=train_data,
    hyperparameters={        
        KNNRapidsModel : {},
        LinearRapidsModel : {},
        RFRapidsModel : {'n_estimators': 100},
        'XGB': {'ag_args_fit': {'num_gpus': 1},  'tree_method': 'gpu_hist', 'ag.early_stop': 10000},
    },
    time_limit=2000,
)

leaderboard = predictor.leaderboard()

leaderboard = predictor.leaderboard(test_data)