Initial Imports

In [18]:
import h2o
from h2o.automl import H2OAutoML
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score
from h2o.estimators import H2ODeepLearningEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator

# Server Start

In [19]:
h2o.init(min_mem_size='2G')

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.24" 2024-07-16; OpenJDK Runtime Environment (build 11.0.24+8-post-Ubuntu-1ubuntu322.04); OpenJDK 64-Bit Server VM (build 11.0.24+8-post-Ubuntu-1ubuntu322.04, mixed mode, sharing)
  Starting server from /home/markel/.local/lib/python3.10/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpm4mlzs66
  JVM stdout: /tmp/tmpm4mlzs66/h2o_markel_started_from_python.out
  JVM stderr: /tmp/tmpm4mlzs66/h2o_markel_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,02 secs
H2O_cluster_timezone:,Europe/Madrid
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.1
H2O_cluster_version_age:,4 months and 30 days
H2O_cluster_name:,H2O_from_python_markel_dfnt8p
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.857 Gb
H2O_cluster_total_cores:,8
H2O_cluster_allowed_cores:,8


## Training of the initial basic model

In [20]:
def load_and_prepare_data(file_path):
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    df['Label_Binary'] = df['Label'].apply(lambda x: 'BENIGN' if x == 'BENIGN' else 'ATTACK')
    
    # Convertir 'Timestamp' a datetime y ordenar
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df = df.sort_values(by='Timestamp')
    
    # Forzar 'Label_Binary' como categoría
    df['Label_Binary'] = df['Label_Binary'].astype('category')
    
    h2o_frame = h2o.H2OFrame(df)

    # Asegurarse de que las columnas sean categóricas donde sea necesario
    h2o_frame['Label_Binary'] = h2o_frame['Label_Binary'].asfactor()

    return h2o_frame


In [21]:
mon_tues_array = [
    "CICIDS2017/Monday-WorkingHours.pcap_ISCX.csv",
    "CICIDS2017/Tuesday-WorkingHours.pcap_ISCX.csv"
]
mon_tues_frames = [load_and_prepare_data(file) for file in mon_tues_array]
mon_tues = mon_tues_frames[0].rbind(mon_tues_frames[1])

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [22]:
wed = load_and_prepare_data("CICIDS2017/Wednesday-workingHours.pcap_ISCX.csv")


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [23]:
# Lista de predictores y respuesta
predictors = [
    "Destination Port", 
    "Protocol","Flow Duration", "Total Fwd Packets", 
    "Total Backward Packets", "Total Length of Fwd Packets", 
    "Total Length of Bwd Packets", "Fwd Packet Length Max", 
    "Fwd Packet Length Min", "Fwd Packet Length Mean", "Fwd Packet Length Std", 
    "Bwd Packet Length Max", "Bwd Packet Length Min", "Bwd Packet Length Mean", 
    "Bwd Packet Length Std", "Flow Bytes/s", "Flow Packets/s", "Flow IAT Mean", 
    "Flow IAT Std", "Flow IAT Max", "Flow IAT Min", "Fwd IAT Total", 
    "Fwd IAT Mean", "Fwd IAT Std", "Fwd IAT Max", "Fwd IAT Min", 
    "Bwd IAT Total", "Bwd IAT Mean", "Bwd IAT Std", "Bwd IAT Max", 
    "Bwd IAT Min", "Fwd PSH Flags", "Bwd PSH Flags", "Fwd URG Flags", 
    "Bwd URG Flags", "Fwd Header Length", "Bwd Header Length", 
    "Fwd Packets/s", "Bwd Packets/s", "Min Packet Length", "Max Packet Length", 
    "Packet Length Mean", "Packet Length Std", "Packet Length Variance", 
    "FIN Flag Count", "SYN Flag Count", "RST Flag Count", "PSH Flag Count", 
    "ACK Flag Count", "URG Flag Count", "CWE Flag Count", "ECE Flag Count", 
    "Down/Up Ratio", "Average Packet Size", "Avg Fwd Segment Size", 
    "Avg Bwd Segment Size", "Fwd Header Length", "Fwd Avg Bytes/Bulk", 
    "Fwd Avg Packets/Bulk", "Fwd Avg Bulk Rate", "Bwd Avg Bytes/Bulk", 
    "Bwd Avg Packets/Bulk", "Bwd Avg Bulk Rate", "Subflow Fwd Packets", 
    "Subflow Fwd Bytes", "Subflow Bwd Packets", "Subflow Bwd Bytes", 
    "Init_Win_bytes_forward", "Init_Win_bytes_backward", "act_data_pkt_fwd", 
    "min_seg_size_forward", "Active Mean", "Active Std", "Active Max", 
    "Active Min", "Idle Mean", "Idle Std", "Idle Max", "Idle Min"
]

response = "Label"
response_binary = "Label_Binary"

In [24]:
def train_automl(train, valid=None, max_runtime_secs=60):
    aml = H2OAutoML(max_runtime_secs=max_runtime_secs, seed=1234, verbosity="info", nfolds=0, keep_cross_validation_predictions=False,
                        )
    aml.train(x=predictors, y=response, training_frame=train, validation_frame=valid)
    return aml


In [25]:
def train_binary(train, valid=None, max_runtime_secs=60, checkpoint_model=None):
    if checkpoint_model is None:
        aml = H2OAutoML(max_runtime_secs=max_runtime_secs, seed=1234, verbosity="info", nfolds=0, keep_cross_validation_predictions=False,
                            include_algos=['DeepLearning', 'DRF'])
        aml.train(x=predictors, y=response_binary, training_frame=train, validation_frame=valid)
        new_model = aml.leader
    else:
        if checkpoint_model.algo == "DeepLearning":
            new_model = H2ODeepLearningEstimator(
                model_id="best_model_DL_" + str(checkpoint_model.params['epochs']['actual'] + 1),
                checkpoint=checkpoint_model.model_id,
                epochs=int(checkpoint_model.params['epochs']['actual']) + 5,
                seed=checkpoint_model.params['seed']['actual']
            )
        elif checkpoint_model.algo == "drf":
            new_model = H2ORandomForestEstimator(
                model_id="best_model_forest_" + str(checkpoint_model.params['ntrees']['actual'] + 1),
                checkpoint=checkpoint_model.model_id,
                ntrees=int(checkpoint_model.params['ntrees']['actual']) + 5,
                seed=checkpoint_model.params['seed']['actual']
            )
        # Entrenar con el conjunto de entrenamiento completo
        new_model.train(x=predictors, y=response_binary, training_frame=train)
    return new_model

In [26]:
def model_metrics_evaluation(model_predictions, ground_truth, label='ATTACK'):
    accuracy = accuracy_score(ground_truth.as_data_frame(), model_predictions.as_data_frame())
    f1 = f1_score(ground_truth.as_data_frame(), model_predictions.as_data_frame(), pos_label=label)
    recall = recall_score(ground_truth.as_data_frame(), model_predictions.as_data_frame(), pos_label=label)
    precision = precision_score(ground_truth.as_data_frame(), model_predictions.as_data_frame(), pos_label=label)
    confusion = confusion_matrix(ground_truth.as_data_frame(), model_predictions.as_data_frame(), labels=['BENIGN', 'ATTACK'])
    return accuracy, f1, recall, precision, confusion

### Pipeline

In [27]:

#Binary clasification
model = train_binary(mon_tues)
print(model)

AutoML progress: |
19:25:03.755: Project: AutoML_1_20240812_192503
19:25:03.756: Cross-validation disabled by user: no fold column nor nfolds > 1.
19:25:04.63: Setting stopping tolerance adaptively based on the training frame: 0.00101231013475982
19:25:04.63: Build control seed: 1234
19:25:04.64: Since cross-validation is disabled, and validation frame(s) were not provided, automatically split the training data into training, validation frame(s) in the ratio 90/10/0.
19:25:06.165: training frame: Frame key: AutoML_1_20240812_192503_training_py_12_sid_b9ea    cols: 86    rows: 878297  chunks: 120    size: 319111011  checksum: 2104673652179994608
19:25:06.191: validation frame: Frame key: AutoML_1_20240812_192503_validation_py_12_sid_b9ea    cols: 86    rows: 97530  chunks: 120    size: 86641026  checksum: 2161188155506823120
19:25:06.192: leaderboard frame: Frame key: AutoML_1_20240812_192503_validation_py_12_sid_b9ea    cols: 86    rows: 97530  chunks: 120    size: 86641026  checksum: 

In [28]:

predictions_wed = model.predict(wed)
accuracy, f1, recall, precision, confusion = model_metrics_evaluation(predictions_wed['predict'], wed['Label_Binary'], label='ATTACK')
print(f"Accuracy: {accuracy}")
print(f"F1: {f1}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print("Confusion Matrix:")
print(confusion)


drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_f

Accuracy: 0.6352376126565065
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[440031      0]
 [252672      0]]


In [39]:

# Supongamos que 'wed' es el DataFrame ya cargado con load_and_prepare_data
wed_df = h2o.as_list(wed, use_pandas=True)  # Convertir a Pandas DataFrame para manipulación temporal

# Convertir los Timestamps de milisegundos a datetime
wed_df['Timestamp'] = pd.to_datetime(wed_df['Timestamp'], unit='ms')

# Obtener el primer y último timestamp en formato datetime
start_time = wed_df['Timestamp'].min()
end_time = wed_df['Timestamp'].max()

# Crear rangos horarios
time_ranges = pd.date_range(start=start_time, end=end_time, freq='H')

# Dividir el DataFrame en intervalos de una hora
wed_hours = [wed_df[(wed_df['Timestamp'] >= time_ranges[i]) & (wed_df['Timestamp'] < time_ranges[i+1])] 
             for i in range(len(time_ranges)-1)]
#drop the empty hours
wed_hours = [hour for hour in wed_hours if len(hour) > 0]
# Convertir de nuevo a H2OFrames si es necesario
wed_hours_h2o = [h2o.H2OFrame(hour_df) for hour_df in wed_hours]

past_hours = []
for hour in wed_hours_h2o:
    predictions_hour = model.predict(hour)
    print(len(predictions_hour['predict']))
    accuracy, f1, recall, precision, confusion = model_metrics_evaluation(predictions_hour['predict'], hour['Label_Binary'], label='ATTACK')
    print(f"Accuracy: {accuracy}")
    print(f"F1: {f1}")
    print(f"Recall: {recall}")
    print(f"Precision: {precision}")
    print("Confusion Matrix:")
    print(confusion)
    past_hours.append(hour)
    #check if retrained needed
    if precision < 0.9 and (confusion[1][1] > 0 or confusion[1][0] > 0):
        #concatenate the past hours
        time_passed = past_hours[0]
        for i in range(1, len(past_hours)):
            time_passed = time_passed.rbind(past_hours[i])
        print(f"len of Time passed: {len(time_passed)}")
        #model accuracy on training data
        print(f"model precision on training data: {model.precision()}")
        total_time = mon_tues.rbind(time_passed)
        new_model = train_binary(total_time, checkpoint_model=model)
        print(f"model precision on training data after retraining: {new_model.precision()}")
        model = new_model
        print("Model retrained")
    print("---------------------------------------------------")



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()



Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
48902



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_d

Accuracy: 1.0
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[48902     0]
 [    0     0]]
---------------------------------------------------
drf prediction progress: |

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()



███████████████████████████████████████████████████████| (done) 100%
63868



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_f

Accuracy: 0.9999217135341643
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[63863     0]
 [    5     0]]
len of Time passed: 112770
model precision on training data: [[0.999998105428268, 1.0]]
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
model precision on training data after retraining: [[0.9999976546747106, 1.0]]
Model retrained
---------------------------------------------------
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
66142



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_f

Accuracy: 0.9998336911493454
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[66131     0]
 [   11     0]]
len of Time passed: 178912
model precision on training data: [[0.9999976546747106, 1.0]]
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
model precision on training data after retraining: [[0.9999982499311117, 1.0]]
Model retrained
---------------------------------------------------
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
44878



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_d

Accuracy: 1.0
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[44878     0]
 [    0     0]]
---------------------------------------------------
drf prediction progress: |

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()



███████████████████████████████████████████████████████| (done) 100%
3547
Accuracy: 1.0
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[3547    0]
 [   0    0]]
---------------------------------------------------
drf prediction progress: |


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_d

███████████████████████████████████████████████████████| (done) 100%
14107



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()



Accuracy: 1.0
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[14107     0]
 [    0     0]]
---------------------------------------------------
drf prediction progress: |

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))

with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()



███████████████████████████████████████████████████████| (done) 100%
54304



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_f

Accuracy: 0.9410540659988215
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[51103     0]
 [ 3201     0]]
len of Time passed: 295748
model precision on training data: [[0.9999982499311117, 1.0]]
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
model precision on training data after retraining: [[0.9999950402170418, 1.0]]
Model retrained
---------------------------------------------------
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
301876



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_f

Accuracy: 0.24001908068213437
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[ 72456      0]
 [229420      0]]
len of Time passed: 597624
model precision on training data: [[0.9999950402170418, 1.0]]
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
model precision on training data after retraining: [[0.9999935230522741, 1.0]]
Model retrained
---------------------------------------------------
drf prediction progress: |███████████████████████████████████████████████████████| (done) 100%
71086



with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_frame()


with h2o.utils.threading.local_context(polars_enabled=True, datatable_enabled=True):
    pandas_df = h2o_df.as_data_f

Accuracy: 0.7181582871451482
F1: 0.0
Recall: 0.0
Precision: 0.0
Confusion Matrix:
[[51051     0]
 [20035     0]]
len of Time passed: 668710
model precision on training data: [[0.9999935230522741, 1.0]]
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
model precision on training data after retraining: [[0.9999747287317557, 1.0]]
Model retrained
---------------------------------------------------


In [30]:
#.cluster().shutdown(prompt=False)