In [1]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
dataset= pd.read_csv("NASDAQ.csv")

In [3]:
dataset

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1985-01-02,247.100006,247.100006,245.899994,245.899994,245.899994,48210000
1,1985-01-03,246.000000,246.699997,246.000000,246.399994,246.399994,52250000
2,1985-01-04,246.100006,246.100006,245.800003,246.100006,246.100006,55800000
3,1985-01-07,246.000000,246.199997,245.899994,245.899994,245.899994,53200000
4,1985-01-08,246.100006,246.399994,246.000000,246.000000,246.000000,64850000
...,...,...,...,...,...,...,...
8941,2020-06-23,10130.830078,10221.849609,10112.440430,10131.370117,10131.370117,5712450000
8942,2020-06-24,10092.919922,10137.500000,9842.219727,9909.169922,9909.169922,5549440000
8943,2020-06-25,9899.360352,10023.280273,9810.469727,10017.000000,10017.000000,4709620000
8944,2020-06-26,9995.120117,10000.669922,9749.070313,9757.219727,9757.219727,7279230000


In [3]:
df = pd.DataFrame(dataset)

In [4]:
import numpy as np

In [5]:
#Removing unneccessary columns
df1=df.drop(['Date','Open','High','Low','Close'], axis=1)

In [6]:
#Creating 'Today' column based on Percentage Change
df1['Today'] = df1['Adj Close'].pct_change(1)

In [7]:
df1.loc[df1['Today'] > 0 , 'Direction'] = 1
df1.loc[df1['Today'] < 0 , 'Direction'] = 0

In [8]:
data = df1.drop(['Adj Close','Volume'], axis=1)

In [9]:
data

Unnamed: 0,Today,Direction
0,,
1,-0.004838,0.0
2,-0.005408,0.0
3,0.003421,1.0
4,-0.001522,0.0
...,...,...
8941,0.004307,1.0
8942,-0.025855,0.0
8943,0.010959,1.0
8944,-0.024227,0.0


In [10]:
df3 = data.dropna()

In [11]:
conda install -c conda-forge imbalanced-learn

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.


In [14]:
from imblearn.over_sampling import SMOTE

In [15]:
df3['Direction'].value_counts()

1.0    4824
0.0    4112
Name: Direction, dtype: int64

In [16]:
X = df3.drop(['Direction'], axis=1)
Y = df3['Direction']

In [17]:
# Implementing Oversampling for Handling Imbalanced 
oversample = SMOTE()
X_res,Y_res=oversample.fit_sample(X,Y)



In [18]:
X_res.shape,Y_res.shape

((9648, 1), (9648,))

In [19]:
from collections import Counter
print('Original dataset shape {}'.format(Counter(Y)))
print('Resampled dataset shape {}'.format(Counter(Y_res)))

Original dataset shape Counter({1.0: 4824, 0.0: 4112})
Resampled dataset shape Counter({0.0: 4824, 1.0: 4824})


In [20]:
X1 = pd.DataFrame(X_res)
Y1 = pd.DataFrame(Y_res)

In [21]:
data2 = pd.concat([X1,Y1],axis=1)

In [22]:
data3 = pd.DataFrame(data2)

In [23]:
data3

Unnamed: 0,0,0.1
0,-0.004838,0.0
1,-0.005408,0.0
2,0.003421,1.0
3,-0.001522,0.0
4,0.007256,1.0
...,...,...
9643,-0.006881,0.0
9644,-0.000089,0.0
9645,-0.006876,0.0
9646,-0.006723,0.0


In [24]:
data3.columns= ['Today','Direction']

In [25]:
#Creating lag features required for Time Series Analysis
data3['lag_1'] = data3['Today'].shift(1)
data3['lag_2'] = data3['Today'].shift(2)
data3['lag_3'] = data3['Today'].shift(3)
data3['lag_4'] = data3['Today'].shift(4)
data3['lag_5'] = data3['Today'].shift(5)

In [26]:
data3

Unnamed: 0,Today,Direction,lag_1,lag_2,lag_3,lag_4,lag_5
0,-0.004838,0.0,,,,,
1,-0.005408,0.0,-0.004838,,,,
2,0.003421,1.0,-0.005408,-0.004838,,,
3,-0.001522,0.0,0.003421,-0.005408,-0.004838,,
4,0.007256,1.0,-0.001522,0.003421,-0.005408,-0.004838,
...,...,...,...,...,...,...,...
9643,-0.006881,0.0,-0.002165,-0.004431,-0.029119,-0.006233,-0.001443
9644,-0.000089,0.0,-0.006881,-0.002165,-0.004431,-0.029119,-0.006233
9645,-0.006876,0.0,-0.000089,-0.006881,-0.002165,-0.004431,-0.029119
9646,-0.006723,0.0,-0.006876,-0.000089,-0.006881,-0.002165,-0.004431


In [27]:
df4 = data3.dropna()

In [28]:
df4

Unnamed: 0,Today,Direction,lag_1,lag_2,lag_3,lag_4,lag_5
5,0.018949,1.0,0.007256,-0.001522,0.003421,-0.005408,-0.004838
6,-0.002377,0.0,0.018949,0.007256,-0.001522,0.003421,-0.005408
7,0.015484,1.0,-0.002377,0.018949,0.007256,-0.001522,0.003421
8,0.001759,1.0,0.015484,-0.002377,0.018949,0.007256,-0.001522
9,0.002225,1.0,0.001759,0.015484,-0.002377,0.018949,0.007256
...,...,...,...,...,...,...,...
9643,-0.006881,0.0,-0.002165,-0.004431,-0.029119,-0.006233,-0.001443
9644,-0.000089,0.0,-0.006881,-0.002165,-0.004431,-0.029119,-0.006233
9645,-0.006876,0.0,-0.000089,-0.006881,-0.002165,-0.004431,-0.029119
9646,-0.006723,0.0,-0.006876,-0.000089,-0.006881,-0.002165,-0.004431


In [29]:
import numpy as np
#Correlation plot in creative way
rs = np.random.RandomState(0)
corr = df4.corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,Today,Direction,lag_1,lag_2,lag_3,lag_4,lag_5
Today,1.0,0.653169,-0.033939,0.0109958,0.0253514,0.00214263,0.0258191
Direction,0.653169,1.0,0.0134789,0.0376661,0.0391709,0.0513809,0.0425085
lag_1,-0.033939,0.0134789,1.0,-0.0340444,0.0109224,0.0253251,0.00202026
lag_2,0.0109958,0.0376661,-0.0340444,1.0,-0.0340831,0.0109284,0.0252965
lag_3,0.0253514,0.0391709,0.0109224,-0.0340831,1.0,-0.0340973,0.0108808
lag_4,0.00214263,0.0513809,0.0253251,0.0109284,-0.0340973,1.0,-0.0340778
lag_5,0.0258191,0.0425085,0.00202026,0.0252965,0.0108808,-0.0340778,1.0


In [30]:
df5 = df4.drop(['Today'],axis=1)

In [31]:
#Creating data for RapidMiner
from sklearn.preprocessing import MinMaxScaler

In [32]:
scaling = MinMaxScaler()
normalised = scaling.fit_transform(df5)

In [33]:
X_resDF= pd.DataFrame(X_res)

In [34]:
Y_resDF= pd.DataFrame(Y_res)

In [35]:
normalisedDF = pd.DataFrame(normalised)

In [36]:
normalisedDF

Unnamed: 0,0,1,2,3,4,5
0,1.0,0.661297,0.633904,0.649330,0.621779,0.623559
1,0.0,0.697783,0.661297,0.633904,0.649330,0.621779
2,1.0,0.631238,0.697783,0.661297,0.633904,0.649330
3,1.0,0.686972,0.631238,0.697783,0.661297,0.633904
4,1.0,0.644144,0.686972,0.631238,0.697783,0.661297
...,...,...,...,...,...,...
9638,0.0,0.631898,0.624829,0.547791,0.619204,0.634152
9639,0.0,0.617183,0.631898,0.624829,0.547791,0.619204
9640,0.0,0.638375,0.617183,0.631898,0.624829,0.547791
9641,0.0,0.617199,0.638375,0.617183,0.631898,0.624829


In [37]:
normalisedDF.columns = ['Direction','Lag_1','Lag_2','Lag_3','Lag_4','Lag_5']

In [38]:
X= normalisedDF.drop(['Direction'], axis=1)
Y= normalisedDF['Direction']

In [39]:
normalisedDF

Unnamed: 0,Direction,Lag_1,Lag_2,Lag_3,Lag_4,Lag_5
0,1.0,0.661297,0.633904,0.649330,0.621779,0.623559
1,0.0,0.697783,0.661297,0.633904,0.649330,0.621779
2,1.0,0.631238,0.697783,0.661297,0.633904,0.649330
3,1.0,0.686972,0.631238,0.697783,0.661297,0.633904
4,1.0,0.644144,0.686972,0.631238,0.697783,0.661297
...,...,...,...,...,...,...
9638,0.0,0.631898,0.624829,0.547791,0.619204,0.634152
9639,0.0,0.617183,0.631898,0.624829,0.547791,0.619204
9640,0.0,0.638375,0.617183,0.631898,0.624829,0.547791
9641,0.0,0.617199,0.638375,0.617183,0.631898,0.624829


In [40]:
normalisedDF.to_csv('SPnewData.csv')

In [41]:
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_validate

In [42]:
#from sklearn.metrics import confusion_matrix

#confusion_matrix(y_true, y_pred)

In [43]:
scoring = {'accuracy':make_scorer(accuracy_score),
           'precision':make_scorer(precision_score),
           'recall':make_scorer(recall_score), 
           'f1_score':make_scorer(f1_score)}

In [44]:
from xgboost import XGBClassifier

In [45]:
# Instantiate the machine learning classifiers
model= DecisionTreeClassifier(criterion='entropy', max_depth=7)
AdaBoostModel = AdaBoostClassifier(base_estimator=model, n_estimators=50, learning_rate=0.1)
GradientBoostModel = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, max_depth=7, random_state=0)
XGBoostModel = XGBClassifier(n_estimators=100,learning_rate=0.1)

In [46]:
# Define the models evaluation function
def models_evaluation(X, Y, folds):
    
    '''
    X : data set features
    Y : data set target
    folds : number of cross-validation folds
    
    '''
    
    # Perform cross-validation to each machine learning classifier
    Ada = cross_validate(AdaBoostModel, X, Y, cv=folds, scoring=scoring)
    Grad = cross_validate(GradientBoostModel, X, Y, cv=folds, scoring=scoring)
    XG = cross_validate(XGBoostModel, X, Y, cv=folds, scoring=scoring)
    

    # Create a data frame with the models perfoamnce metrics scores
    models_scores_table = pd.DataFrame({'Ada Boost Classifier':[Ada['test_accuracy'].mean(),
                                                               Ada['test_precision'].mean(),
                                                               Ada['test_recall'].mean(),
                                                               Ada['test_f1_score'].mean()],
                                       
                                      'Gradient Boost Classifier':[Grad['test_accuracy'].mean(),
                                                                   Grad['test_precision'].mean(),
                                                                   Grad['test_recall'].mean(),
                                                                   Grad['test_f1_score'].mean()],
                                       
                                      'XG Boost Classifier':[XG['test_accuracy'].mean(),
                                                       XG['test_precision'].mean(),
                                                       XG['test_recall'].mean(),
                                                       XG['test_f1_score'].mean()]},
                                       
                                      
                                      
                                      index= ['Accuracy', 'Precision', 'Recall', 'F1 Score'])
    
    # Add 'Best Score' column
    models_scores_table['Best Score'] = models_scores_table.idxmax(axis=1)
    
    # Return models performance metrics scores data frame
    return(models_scores_table)
  
# Run models_evaluation function
models_evaluation(X, Y, 10)

Unnamed: 0,Ada Boost Classifier,Gradient Boost Classifier,XG Boost Classifier,Best Score
Accuracy,0.512401,0.527646,0.520492,Gradient Boost Classifier
Precision,0.526882,0.552211,0.546795,Gradient Boost Classifier
Recall,0.591657,0.656977,0.648476,Gradient Boost Classifier
F1 Score,0.550205,0.584544,0.57812,Gradient Boost Classifier


In [47]:
pip install h2o

Note: you may need to restart the kernel to use updated packages.


In [48]:
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
from h2o.grid.grid_search import H2OGridSearch
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,2 hours 1 min
H2O_cluster_timezone:,Europe/London
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.30.0.7
H2O_cluster_version_age:,24 days
H2O_cluster_name:,H2O_from_python_User_duacu9
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,1.797 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


In [49]:
from sklearn.model_selection import train_test_split

In [50]:
# create training and testing vars
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
print (X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)

(7714, 5) (7714,)
(1929, 5) (1929,)


In [51]:
Y_train = Y_train.astype('category')
Y_test = Y_test.astype('category')

In [52]:
nfolds = 5

In [53]:
train_data = pd.concat([X_train,Y_train],axis=1)

In [54]:
test_data = pd.concat([X_test,Y_test],axis=1)

In [55]:
train= h2o.H2OFrame(train_data)
test= h2o.H2OFrame(test_data)

Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%


In [56]:
train

Lag_1,Lag_2,Lag_3,Lag_4,Lag_5,Direction
0.651104,0.636272,0.653414,0.613854,0.683616,0
0.645356,0.651732,0.639311,0.644137,0.668151,1
0.615349,0.68522,0.594082,0.633738,0.646581,0
0.616659,0.626411,0.655753,0.64071,0.659492,1
0.68184,0.656713,0.612734,0.627665,0.679185,1
0.558672,0.624726,0.678729,0.643781,0.643413,0
0.619356,0.641674,0.642069,0.637567,0.691954,0
0.62037,0.628904,0.600737,0.631206,0.632383,0
0.63468,0.568861,0.609317,0.587127,0.631218,0
0.623406,0.629354,0.642702,0.608844,0.639983,1




In [57]:
x=train.columns

In [58]:
y="Direction"

In [59]:
x.remove(y)

In [60]:
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
test[y] = test[y].asfactor()

In [61]:
#1. Generate a 2-model ensemble (GBM + RF)

# Train and cross-validate a GBM
my_gbm = H2OGradientBoostingEstimator(distribution="bernoulli",
                                      ntrees=10,
                                      max_depth=3,
                                      min_rows=2,
                                      learn_rate=0.2,
                                      nfolds=nfolds,
                                      fold_assignment="Modulo",
                                      keep_cross_validation_predictions=True,
                                      seed=1)
my_gbm.train(x=x, y=y, training_frame=train)

gbm Model Build progress: |███████████████████████████████████████████████| 100%


In [62]:
# Train and cross-validate a RF
my_rf = H2ORandomForestEstimator(ntrees=50,
                                 nfolds=nfolds,
                                 fold_assignment="Modulo",
                                 keep_cross_validation_predictions=True,
                                 seed=1)
my_rf.train(x=x, y=y, training_frame=train)

drf Model Build progress: |███████████████████████████████████████████████| 100%


In [63]:
# Train a stacked ensemble using the GBM and GLM above
ensemble = H2OStackedEnsembleEstimator(model_id="my_ensemble_binomial",
                                       base_models=[my_gbm, my_rf])
ensemble.train(x=x, y=y, training_frame=train)

# Eval ensemble performance on the test data
perf_stack_test = ensemble.model_performance(test)

stackedensemble Model Build progress: |███████████████████████████████████| 100%


In [64]:
# Compare to base learner performance on the test set
perf_gbm_test = my_gbm.model_performance(test)
perf_rf_test = my_rf.model_performance(test)
baselearner_best_auc_test = max(perf_gbm_test.auc(), perf_rf_test.auc())
stack_auc_test = perf_stack_test.auc()
print("Best Base-learner Test AUC:  {0}".format(baselearner_best_auc_test))
print("Ensemble Test AUC:  {0}".format(stack_auc_test))

Best Base-learner Test AUC:  0.5810841050710916
Ensemble Test AUC:  0.5653784168760974


In [65]:
# Generate predictions on a test set
pred = ensemble.predict(test)

stackedensemble prediction progress: |████████████████████████████████████| 100%


In [66]:
ensemble.confusion_matrix

Model Details
H2OStackedEnsembleEstimator :  Stacked Ensemble
Model Key:  my_ensemble_binomial

No model summary for this model

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.16702383807525742
RMSE: 0.4086855002018758
LogLoss: 0.5207595543990732
Null degrees of freedom: 7713
Residual degrees of freedom: 7711
Null deviance: 10693.468164533364
Residual deviance: 8034.278405268901
AIC: 8040.278405268901
AUC: 0.9838342353098164
AUCPR: 0.9894295932779709
Gini: 0.9676684706196328

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.5291436388911758: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,3746.0,83.0,0.0217,(83.0/3829.0)
1,1,204.0,3681.0,0.0525,(204.0/3885.0)
2,Total,3950.0,3764.0,0.0372,(287.0/7714.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.529144,0.962479,174.0
1,max f2,0.504843,0.962018,193.0
2,max f0point5,0.539984,0.976416,167.0
3,max accuracy,0.529144,0.962795,174.0
4,max precision,0.826106,1.0,0.0
5,max recall,0.308503,1.0,350.0
6,max specificity,0.826106,1.0,0.0
7,max absolute_mcc,0.529144,0.926059,174.0
8,max min_per_class_accuracy,0.520505,0.958816,181.0
9,max mean_per_class_accuracy,0.529144,0.962907,174.0



Gains/Lift Table: Avg response rate: 50.36 %, avg score: 50.41 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.010111,0.698505,1.985586,1.985586,1.0,0.72842,1.0,0.72842,0.020077,0.020077,98.558559,98.558559,0.020077
1,2,0.020093,0.678951,1.985586,1.985586,1.0,0.687309,1.0,0.707997,0.01982,0.039897,98.558559,98.558559,0.039897
2,3,0.030075,0.671216,1.985586,1.985586,1.0,0.675412,1.0,0.697182,0.01982,0.059717,98.558559,98.558559,0.059717
3,4,0.040057,0.664722,1.985586,1.985586,1.0,0.66803,1.0,0.689918,0.01982,0.079537,98.558559,98.558559,0.079537
4,5,0.050039,0.658608,1.985586,1.985586,1.0,0.661383,1.0,0.684226,0.01982,0.099356,98.558559,98.558559,0.099356
5,6,0.100078,0.639575,1.985586,1.985586,1.0,0.64811,1.0,0.666168,0.099356,0.198713,98.558559,98.558559,0.198713
6,7,0.149987,0.627232,1.985586,1.985586,1.0,0.633279,1.0,0.655224,0.099099,0.297812,98.558559,98.558559,0.297812
7,8,0.200026,0.616074,1.985586,1.985586,1.0,0.62159,1.0,0.64681,0.099356,0.397169,98.558559,98.558559,0.397169
8,9,0.299974,0.595963,1.985586,1.985586,1.0,0.605627,1.0,0.633088,0.198456,0.595624,98.558559,98.558559,0.595624
9,10,0.400052,0.570999,1.983014,1.984942,0.998705,0.58409,0.999676,0.620831,0.198456,0.79408,98.301358,98.494217,0.793819





<bound method H2OBinomialModel.confusion_matrix of >

In [67]:
# 2. Generate a random grid of models and stack them together
# Specify GBM hyperparameters for the grid
hyper_params = {"learn_rate": [0.01, 0.03],
                "max_depth": [3, 4, 5, 6, 9],
                "sample_rate": [0.7, 0.8, 0.9, 1.0],
                "col_sample_rate": [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]}
search_criteria = {"strategy": "RandomDiscrete", "max_models": 3, "seed": 1}

# Train the grid
grid = H2OGridSearch(model=H2OGradientBoostingEstimator(ntrees=10,
                                                        seed=1,
                                                        nfolds=nfolds,
                                                        fold_assignment="Modulo",
                                                        keep_cross_validation_predictions=True),
                     hyper_params=hyper_params,
                     search_criteria=search_criteria,
                     grid_id="gbm_grid_binomial")
grid.train(x=x, y=y, training_frame=train)

# Train a stacked ensemble using the GBM grid
ensemble = H2OStackedEnsembleEstimator(model_id="my_ensemble_gbm_grid_binomial",
                                       base_models=grid.model_ids)
ensemble.train(x=x, y=y, training_frame=train)

# Eval ensemble performance on the test data
perf_stack_test = ensemble.model_performance(test)

# Compare to base learner performance on the test set
baselearner_best_auc_test = max([h2o.get_model(model).model_performance(test_data=test).auc() for model in grid.model_ids])
stack_auc_test = perf_stack_test.auc()
print("Best Base-learner Test AUC:  {0}".format(baselearner_best_auc_test))
print("Ensemble Test AUC:  {0}".format(stack_auc_test))

# Generate predictions on a test set (if neccessary)
pred = ensemble.predict(test)

gbm Grid Build progress: |████████████████████████████████████████████████| 100%
stackedensemble Model Build progress: | (failed)


OSError: Job with key $03017f00000132d4ffffffff$_acee49d48e91680016cd80d6b634583 failed with an exception: water.exceptions.H2OIllegalArgumentException: Base models are inconsistent: they use different size (number of rows) training frames. Found: 7714 (StackedEnsemble) and 7960 (model gbm_grid_binomial_model_8).
stacktrace: 
water.exceptions.H2OIllegalArgumentException: Base models are inconsistent: they use different size (number of rows) training frames. Found: 7714 (StackedEnsemble) and 7960 (model gbm_grid_binomial_model_8).
	at hex.ensemble.StackedEnsembleModel.checkAndInheritModelProperties(StackedEnsembleModel.java:493)
	at hex.ensemble.StackedEnsemble$StackedEnsembleDriver.computeImpl(StackedEnsemble.java:289)
	at hex.ModelBuilder$Driver.compute2(ModelBuilder.java:252)
	at water.H2O$H2OCountedCompleter.compute(H2O.java:1557)
	at jsr166y.CountedCompleter.exec(CountedCompleter.java:468)
	at jsr166y.ForkJoinTask.doExec(ForkJoinTask.java:263)
	at jsr166y.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:974)
	at jsr166y.ForkJoinPool.runWorker(ForkJoinPool.java:1477)
	at jsr166y.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:104)


In [None]:
perf_stack_test.auc()

In [68]:
from h2o.automl import H2OAutoML

In [70]:
# Run AutoML for 20 base models (limited to 1 hour max runtime by default)
aml = H2OAutoML(max_models=20, seed=1)
aml.train(x=x, y=y, training_frame=train)

AutoML progress: |█
19:44:09.645: AutoML: XGBoost is not available; skipping it.

█████████████████████████████████████████████████████████ (cancelled)  99%


H2OJobCancelled: Job<$03017f00000132d4ffffffff$_85043d13069d5ea8d0350155fc954453> was cancelled by the user.

In [None]:
# View the AutoML Leaderboard
lb = aml.leaderboard
lb.head(rows=lb.nrows)