In [1]:
import streamlit as st

import numpy as np
import pandas as pd

import time

import h2o
from h2o.automl import H2OAutoML

In [2]:
#Set some variables here (use this area to set up parameters for AutoML and datasets)
seed = 1234
nfolds = 5
stopping_metric = 'AUC'
max_models = 10

In [3]:
uploaded_file = st.file_uploader("Choose a CSV file to train the Lead Scoring model", type="csv")
st.write('Note, you should remove ID values from the training set.')
    
if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)
    with st.spinner('Wait for it...'):
        time.sleep(5)
        st.success('Done!')
    st.write(data)

In [4]:
target = st.text_input('Select the Target Variable (note: this is case sensitive)', 'Converted')

uploaded_file = st.file_uploader("Choose a CSV file to make predictions on the model", type="csv")


In [5]:
if uploaded_file is not None:
    load = pd.read_csv(uploaded_file)
    predict = load.sample(5)
    st.write("We'll do a random sample of 5 rows", predict)


In [6]:
if st.button('Kick off Training & Predictions'):
    st.write('Intializing Training Cluster...')
    #Initialize H2O cluster
    h2o.init()

    st.write('Loading Data into Model...')

    train = h2o.H2OFrame(data)
    train,test= train.split_frame(ratios=[.7])

    # Identify predictors and response
    x = train.columns
    y = target
    x.remove(y)

    train[y] = train[y].asfactor()
    test[y] = test[y].asfactor()

    # Run AutoML for 20 base models (limited to 1 hour max runtime by default)
    with st.spinner('Wait for it...'):
        aml = H2OAutoML(max_models=max_models, seed=seed, nfolds=nfolds, stopping_metric=stopping_metric, exclude_algos = ["StackedEnsemble", "DeepLearning", "DRF"])
        aml.train(x=x, y=y, training_frame=train)
        time.sleep(5)
        st.success('Done!')

    # View the AutoML Leaderboard
    lb = aml.leaderboard
    #lb.head(rows=lb.nrows)

    # Get Leader Accuracy
    perf_leader = aml.leader.model_performance(test).auc()
    st.write("The best model accuracy for the model (AUC) is:", str(perf_leader))

    perf_f1 = aml.leader.model_performance(test).F1()
    st.write("The best model accuracy for the model (F1) is:", str(perf_f1))

    m = h2o.get_model(lb[2,"model_id"])
    FI = m.varimp(use_pandas=True)

    st.write("Important Features", FI)
    
    # Get predictions
    preds = aml.predict(test)
    print(preds)

    predict_frame = h2o.H2OFrame(predict)
    preds = aml.predict(predict_frame)

    st.write('Intializing Prediction Cluster...')

    st.write(preds)

    tmp = preds.as_data_frame()
    tmp2 = predict_frame.as_data_frame()
    out = pd.merge(tmp, tmp2, left_index=True, right_index=True)
    st.write('Here are the predictions with probabilities:', out)

    st.write('Shutting down Training and Prediction Clusters...')
    h2o.cluster().shutdown()
    st.write("Thank you and Goodbye.")
else:
    pass