In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import h2o
from h2o.automl import H2OAutoML
from sklearn.model_selection import train_test_split

# Initialize H2O
h2o.init()

# Read data
data = pd.read_csv("Cleaned Global Eco Inc.csv")
data = pd.get_dummies(data, columns=['Country'], drop_first=True)
data = data.drop('Unnamed: 0', axis=1)

# Replace infinite values with NaN and drop them
data = data.replace([np.inf, -np.inf], np.nan).dropna()

# Define features and target variable
X = data.drop('GDP Growth Rate (%)', axis=1)
y = data['GDP Growth Rate (%)']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Convert training and testing sets to H2O frames
train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))

# Set target and feature names
target = 'GDP Growth Rate (%)'
features = list(X_train.columns)

# Train H2O AutoML model
aml = H2OAutoML(max_models=20, seed=1, nfolds=5)
aml.train(x=features, y=target, training_frame=train)

# Streamlit UI
st.title("GDP Growth Rate Predictor")
st.write("This app uses H2O AutoML to predict the GDP growth rate for selected country and year.")

# Country options
country_options = [col.replace("Country_", "") for col in data.columns if col.startswith("Country_")]
selected_country = st.selectbox("Select a Country", country_options)

# Year options
min_year, max_year = 1960, 2020
selected_year = st.slider("Select a Year", min_year, max_year, max_year)

# Set default values for other features
default_values = data.mean().to_dict()
default_values.update({
    'Year': selected_year,
    f'Country_{selected_country}': 1  # One-hot encode the selected country
})

# Create a data frame with the default values
input_data = {feature: st.number_input(feature, value=default_values.get(feature, 0)) for feature in features}

# Convert to H2OFrame for prediction
input_df = pd.DataFrame([input_data])
input_h2o = h2o.H2OFrame(input_df)

# Predict GDP Growth Rate
if st.button("Predict GDP Growth Rate"):
    preds = aml.predict(input_h2o)
    predicted_value = preds.as_data_frame().iloc[0, 0]
    st.write(f"Predicted GDP Growth Rate for {selected_country} in {selected_year}: **{predicted_value:.2f}%**")

# Visualization
if st.button("Show Actual vs Predicted"):
    # Predict on the test set
    preds = aml.predict(test)

    # Convert H2O frame to pandas dataframe for evaluation
    preds_df = preds.as_data_frame()
    y_test_df = y_test.reset_index(drop=True)
    results = pd.concat([y_test_df, preds_df], axis=1)
    results.columns = ['Actual', 'Predicted']

    # Visualization: Plot of predicted vs actual GDP Growth Rate
    st.write("### Actual vs Predicted GDP Growth Rate")
    st.line_chart(results)




Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,1 min 58 secs
H2O_cluster_timezone:,America/Los_Angeles
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.1
H2O_cluster_version_age:,1 month and 24 days
H2O_cluster_name:,H2O_from_python_musha_40mmfr
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.900 Gb
H2O_cluster_total_cores:,32
H2O_cluster_allowed_cores:,32


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█
11:38:49.969: AutoML: XGBoost is not available; skipping it.

█████████████████████████████████████████████████████████████Job request failed Unexpected HTTP error: HTTPConnectionPool(host='localhost', port=54321): Max retries exceeded with url: /3/Jobs/$03017f00000132d4ffffffff$_8926bff1a572a9406fca872957c84a45 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000174F19F36D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it')), will retry after 3s.
Job request failed Unexpected HTTP error: HTTPConnectionPool(host='localhost', port=54321): Max retries exceeded with url: /3/Jobs/$03017f00000132d4ffffffff$_8926bff1a572a9406fca872957c84a45 (Caused by NewConnectionError('<u

Job request failed Unexpected HTTP error: HTTPConnectionPool(host='localhost', port=54321): Max retries exceeded with url: /3/Jobs/$03017f00000132d4ffffffff$_8926bff1a572a9406fca872957c84a45 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000174F1A8BBD0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it')), will retry after 3s.
Job request failed Unexpected HTTP error: HTTPConnectionPool(host='localhost', port=54321): Max retries exceeded with url: /3/Jobs/$03017f00000132d4ffffffff$_8926bff1a572a9406fca872957c84a45 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000174F15CA6D0>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it')), will retry after 3s.
Job request failed Unexpected HTTP error: HTTPConnectionPool(host='localhost', port=54321): Max retries exceeded w

H2OConnectionError: Unexpected HTTP error: HTTPConnectionPool(host='localhost', port=54321): Max retries exceeded with url: /3/Jobs/$03017f00000132d4ffffffff$_8926bff1a572a9406fca872957c84a45 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000174F152BC50>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [2]:
# Shutdown H2O cluster when Streamlit app is closed
h2o.cluster().shutdown(prompt=False)