In [1]:
### TPOT ###

# Importing necessary tools and libraries
import tpot
from tpot import TPOTClassifier
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

# Initializing our TPOT pipeline optimizer
pipeline_optimizer = TPOTClassifier(generations=5, 
                                    verbosity=2, 
                                    config_dict="TPOT light")

# Loading a dataset for training
data = datasets.load_breast_cancer()

# Splitting our data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data["data"], 
                                                    data["target"], 
                                                    test_size=0.2, 
                                                    stratify=data["target"])

# Training the AutoML algorithm
pipeline_optimizer.fit(X_train, y_train)


# Predicting class names
y_pred = pipeline_optimizer.predict(X_test)

# Viewing predictions
print(y_pred[:10])

# Exporting the pipeline's code
pipeline_optimizer.export('tpot_pipeline.py')

Optimization Progress:   0%|          | 0/600 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.9714285714285713

Generation 2 - Current best internal CV score: 0.9714285714285713

Generation 3 - Current best internal CV score: 0.9714285714285715

Generation 4 - Current best internal CV score: 0.9714285714285715

Generation 5 - Current best internal CV score: 0.9714285714285715

Best pipeline: LogisticRegression(VarianceThreshold(RobustScaler(input_matrix), threshold=0.001), C=0.5, dual=False, penalty=l2)
[1 1 0 1 1 1 1 1 1 0]


In [2]:
### H2O AutoML ###

# Importing necessary tools
import h2o
from h2o.automl import H2OAutoML
from sklearn import datasets
from sklearn.model_selection import train_test_split

# Initializing our H2O cluster
h2o.init(name="h2ocluster", nthreads=6)

# Loading a dataset for training
data = datasets.load_breast_cancer()

# Splitting our data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data["data"], 
                                                    data["target"], 
                                                    test_size=0.2, 
                                                    stratify=data["target"])

# Creating H2OFrame objects for train data
train_features = h2o.H2OFrame(X_train, column_names=list(data["feature_names"]))
train_labels = h2o.H2OFrame(y_train, column_names=["target"])

# Creating H2OFrame objects for test data
test_features = h2o.H2OFrame(X_test, column_names=list(data["feature_names"]))
test_labels = h2o.H2OFrame(y_test, column_names=["target"])

# Joining our features and labels
train_frame = train_features.cbind(train_labels)
test_frame = test_features.cbind(test_labels)    

# Specifying feature and target names for training
x = train_frame.columns
y = "target"
x.remove(y)

# Converting labels to categoricals
train_frame[y] = train_frame[y].asfactor()
test_frame[y] = test_frame[y].asfactor()

# Creating our AutoML object
aml = H2OAutoML(seed=1, max_runtime_secs=300)

# Training the AutoML object
aml.train(x=x, y=y, training_frame=train_frame)

# Obtaining the best model
best_model = aml.leader # Equivalent to aml.get_best_model()

# Doing inference with the best model
predictions = best_model.predict(test_frame)

# Viewing predictions
print(predictions)

# Saving our model
model_path = h2o.save_model(model=best_model,   
                            path="/tmp/leader_model", 
                            force=True)

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
; Java HotSpot(TM) 64-Bit Server VM (build 25.301-b09, mixed mode)
  Starting server from C:\Users\tigra\AppData\Local\Programs\Python\Python39\Lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: C:\Users\tigra\AppData\Local\Temp\tmp4oksfd45
  JVM stdout: C:\Users\tigra\AppData\Local\Temp\tmp4oksfd45\h2o_tigra_started_from_python.out
  JVM stderr: C:\Users\tigra\AppData\Local\Temp\tmp4oksfd45\h2o_tigra_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,02 secs
H2O_cluster_timezone:,Asia/Yerevan
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.1.7
H2O_cluster_version_age:,1 month and 15 days
H2O_cluster_name:,h2ocluster
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.545 Gb
H2O_cluster_total_cores:,12
H2O_cluster_allowed_cores:,6


Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
AutoML progress: |
17:41:34.466: AutoML: XGBoost is not available; skipping it.

████████████████████████████████████████████████████████| 100%
gbm prediction progress: |████████████████████████████████████████████████| 100%


predict,p0,p1
1,0.000151923,0.999848
0,0.00766983,0.99233
1,4.41217e-05,0.999956
0,0.0243219,0.975678
1,0.00631126,0.993689
0,0.517591,0.482409
1,4.28303e-05,0.999957
0,0.999843,0.000157158
1,0.00032014,0.99968
1,4.75229e-05,0.999952



