In [3]:
!pip install h2o

Collecting h2o
  Downloading h2o-3.44.0.3.tar.gz (265.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.2/265.2 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: h2o
  Building wheel for h2o (setup.py) ... [?25l[?25hdone
  Created wheel for h2o: filename=h2o-3.44.0.3-py2.py3-none-any.whl size=265293968 sha256=faac25fefced57889337685bf214d903f3b159a7487ae4683b58b99cca0ffa86
  Stored in directory: /root/.cache/pip/wheels/77/9a/1c/2da26f943fd46b57f3c20b54847b936b9152b831dc7447cf71
Successfully built h2o
Installing collected packages: h2o
Successfully installed h2o-3.44.0.3


In [None]:
# H2O AutoMl

import h2o
from h2o.automl import H2OAutoML
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Start H2O
h2o.init()

# dataset
df = pd.read_csv('final2.csv')

target_variable = 'DiagPeriodL90D'

train, test = train_test_split(df, test_size=0.2, random_state=42)

# Pandas DataFrame to an H2O Frame
train_h2o = h2o.H2OFrame(train)
test_h2o = h2o.H2OFrame(test)

# Defining predictors
predictors = list(df.columns[df.columns != target_variable])

#AutoML model
aml = H2OAutoML(max_runtime_secs=600)  # Adjust max_runtime_secs as needed

# Train
aml.train(x=predictors, y=target_variable, training_frame=train_h2o)

#leaderboard
lb = aml.leaderboard
print(lb)

# prediction
predictions_h2o = aml.leader.predict(test_h2o)

# H2O predictions to a Pandas DataFrame
predictions_df = h2o.as_list(predictions_h2o)

# Ensuring predictions are in the correct format (0 or 1)
predictions_df['predict'] = predictions_df['predict'].astype(int)

# Evaluation
accuracy = accuracy_score(test[target_variable], predictions_df['predict'])
print(f'Accuracy: {accuracy}')

# classification report
print(classification_report(test[target_variable], predictions_df['predict']))

# Shut down H2O
h2o.shutdown()


Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.21" 2023-10-17; OpenJDK Runtime Environment (build 11.0.21+9-post-Ubuntu-0ubuntu122.04); OpenJDK 64-Bit Server VM (build 11.0.21+9-post-Ubuntu-0ubuntu122.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.10/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmp7pbenei6
  JVM stdout: /tmp/tmp7pbenei6/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmp7pbenei6/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,03 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,1 month and 21 days
H2O_cluster_name:,H2O_from_python_unknownUser_uv8t48
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.170 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |
07:31:59.118: _train param, Dropping bad and constant columns: [patient_F]
07:31:59.118: _response param, We have detected that your response column has only 2 unique values (0/1). If you wish to train a binary model instead of a regression model, convert your target column to categorical before training.

█████
07:32:40.531: _train param, Dropping bad and constant columns: [patient_F]
07:32:40.531: _response param, We have detected that your response column has only 2 unique values (0/1). If you wish to train a binary model instead of a regression model, convert your target column to categorical before training.

█
07:32:48.526: _train param, Dropping bad and constant columns: [patient_F]
07:32:48.526: _response param, We have detected that your response column has only 2 unique 

In [1]:
# Keras

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from tensorflow import keras
from tensorflow.keras import layers

df = pd.read_csv('final2.csv')

target_variable = 'DiagPeriodL90D'

train, test = train_test_split(df, test_size=0.2, random_state=42)

#  predictors
predictors = list(df.columns[df.columns != target_variable])

X_train = train[predictors].values
y_train = train[target_variable].values
X_test = test[predictors].values
y_test = test[target_variable].values

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Keras model
model = keras.Sequential([
    layers.Input(shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# predictions
predictions = model.predict(X_test)
predictions_binary = (predictions > 0.5).astype(int)

# Evaluation
accuracy = accuracy_score(y_test, predictions_binary)
print(f'Accuracy: {accuracy}')

# classification report
print(classification_report(y_test, predictions_binary))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 0.3737412858249419
              precision    recall  f1-score   support

           0       0.37      1.00      0.54       965
           1       0.00      0.00      0.00      1617

    accuracy                           0.37      2582
   macro avg       0.19      0.50      0.27      2582
weighted avg       0.14      0.37      0.20      2582



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
