In [1]:
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import h2o
from h2o.automl import H2OAutoML
import json
import requests

%matplotlib inline

In [2]:
#01473730 - Schuylkill River at Conshohocken, PA

In [3]:
# Get the USGS water data as JSON
usgs_water_api = "http://waterservices.usgs.gov/nwis/iv?sites=01473730&period=P7D&format=json"
api_response = requests.get(usgs_water_api)
water_data = api_response.json()

In [4]:
water_data

{'name': 'ns1:timeSeriesResponseType',
 'declaredType': 'org.cuahsi.waterml.TimeSeriesResponseType',
 'scope': 'javax.xml.bind.JAXBElement$GlobalScope',
 'value': {'queryInfo': {'queryURL': 'http://waterservices.usgs.gov/nwis/ivsites=01473730&period=P7D&format=json',
   'criteria': {'locationParam': '[ALL:01473730]',
    'variableParam': 'ALL',
    'parameter': []},
   'note': [{'value': '[ALL:01473730]', 'title': 'filter:sites'},
    {'value': '[mode=PERIOD, period=P7D, modifiedSince=null]',
     'title': 'filter:timeRange'},
    {'value': 'methodIds=[ALL]', 'title': 'filter:methodId'},
    {'value': '2024-09-04T19:48:05.582Z', 'title': 'requestDT'},
    {'value': '9a5908c0-6af6-11ef-9859-005056beda50', 'title': 'requestId'},
    {'value': 'Provisional data are subject to revision. Go to http://waterdata.usgs.gov/nwis/help/?provisional for more information.',
     'title': 'disclaimer'},
    {'value': 'caas01', 'title': 'server'}]},
  'timeSeries': [{'sourceInfo': {'siteName': 'Schuyl

In [3]:
# Extract interesting data from the JSON
site_name = water_data["value"]["timeSeries"][0]["sourceInfo"]["siteName"]
station_id = water_data["value"]["timeSeries"][0]["sourceInfo"]["siteCode"][0]["value"]
agency_code = water_data["value"]["timeSeries"][0]["sourceInfo"]["siteCode"][0]["agencyCode"]

date_time_last = water_data["value"]["timeSeries"][1]["values"][0]["value"][-3]["dateTime"]
date_time_mid = water_data["value"]["timeSeries"][1]["values"][0]["value"][-2]["dateTime"]
date_time_current = water_data["value"]["timeSeries"][1]["values"][0]["value"][-1]["dateTime"]

streamflow_last = water_data["value"]["timeSeries"][0]["values"][0]["value"][-3]["value"]
streamflow_mid = water_data["value"]["timeSeries"][0]["values"][0]["value"][-2]["value"]
streamflow_current = water_data["value"]["timeSeries"][0]["values"][0]["value"][-1]["value"]

gage_height_last = water_data["value"]["timeSeries"][1]["values"][0]["value"][-3]["value"]
gage_height_mid = water_data["value"]["timeSeries"][1]["values"][0]["value"][-2]["value"]
gage_height_current = water_data["value"]["timeSeries"][1]["values"][0]["value"][-1]["value"]


# Print the data
print(f"Site name: {site_name}")

print(f"Current Datetime: {date_time_current}")
print(f"Last Datetime: {date_time_last}")

print(f"Station ID: {station_id}")
print(f"Agency code: {agency_code}")

print(f"Current Streamflow (ft3/s): {streamflow_last}")
print(f"Last Streamflow (ft3/s): {streamflow_current}")

print(f"Current Gage height (ft): {gage_height_last}")
print(f"Last Gage height (ft): {gage_height_current}")


Site name: Schuylkill River at Conshohocken, PA
Current Datetime: 2024-08-28T10:30:00.000-04:00
Last Datetime: 2024-08-28T10:00:00.000-04:00
Station ID: 01473730
Agency code: USGS
Current Streamflow (ft3/s): 718
Last Streamflow (ft3/s): 707
Current Gage height (ft): 6.68
Last Gage height (ft): 6.67


In [4]:
realtime_score = [{'datetime': date_time_last, 'Gage': gage_height_last, 'Flow': streamflow_last},
                  {'datetime': date_time_mid, 'Gage': gage_height_mid, 'Flow': streamflow_mid },
                   {'datetime': date_time_current, 'Gage': gage_height_current, 'Flow': streamflow_current }]

In [5]:
df = pd.DataFrame(realtime_score)
df = df.set_index('datetime')

In [6]:
df['Gage'] = df['Gage'].astype(float)
df['Flow'] = df['Flow'].astype(float)

In [7]:
df['Gage_diff'] = df['Gage'].shift(1) - df['Gage']
df['Flow_diff'] = df['Flow'].shift(1) - df['Flow']

In [8]:
df.dtypes

Gage         float64
Flow         float64
Gage_diff    float64
Flow_diff    float64
dtype: object

In [9]:
print('number of rows = ', df.shape[0])

number of rows =  3


In [10]:
df = df.fillna(0)

In [11]:
df

Unnamed: 0_level_0,Gage,Flow,Gage_diff,Flow_diff
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-08-28T10:00:00.000-04:00,6.68,718.0,0.0,0.0
2024-08-28T10:15:00.000-04:00,6.68,718.0,0.0,0.0
2024-08-28T10:30:00.000-04:00,6.67,707.0,0.01,11.0


In [12]:
df = df[-1:]

In [13]:
print ("Will score this row, ", df)

Will score this row,                                 Gage   Flow  Gage_diff  Flow_diff
datetime                                                        
2024-08-28T10:30:00.000-04:00  6.67  707.0       0.01       11.0


In [20]:
h2o.init(nthreads=-1)

Checking whether there is an H2O instance running at http://localhost:54321. connected.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,50 secs
H2O_cluster_timezone:,America/New_York
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,8 months and 7 days
H2O_cluster_name:,H2O_from_python_tott_9pe9g0
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.540 Gb
H2O_cluster_total_cores:,10
H2O_cluster_allowed_cores:,10


In [21]:
score = h2o.H2OFrame(df)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [22]:
#score['Gage'] = score['Gage'].isnumeric()
#score['Flow'] = score['Flow'].isnumeric()
score['Gage_diff'] = score['Gage_diff'].ascharacter().asnumeric()
score['Flow_diff'] = score['Flow_diff'].ascharacter().asnumeric()

In [23]:
score.describe()

Unnamed: 0,Gage,Flow,Gage_diff,Flow_diff
type,real,int,int,int
mins,6.67,707.0,,11.0
mean,6.67,707.0,0.0,11.0
maxs,6.67,707.0,,11.0
sigma,0.0,0.0,-0.0,0.0
zeros,0,0,0,0
missing,0,0,1,0
0,6.67,707.0,,11.0


In [24]:
print(score.columns)

['Gage', 'Flow', 'Gage_diff', 'Flow_diff']


In [25]:
model_path_4 = 'model/4_time_units/GBM_grid_1_AutoML_1_20240812_91917_model_176'
saved_model = h2o.load_model(model_path_4)
predict = saved_model.predict(score)
print(f"1 Hour Gage height forecast (ft): {predict}")

H2OResponseError: Server error java.lang.IllegalArgumentException:
  Error: Found version 3.46.0.4, but running version 3.44.0.3

For more information visit:
  https://github.com/h2oai/h2o-3/discussions/15523
  Request: POST /99/Models.bin/
    data: {'dir': 'model/4_time_units/GBM_grid_1_AutoML_1_20240812_91917_model_176'}


In [None]:
model_path_12 = 'model/12_time_units/GBM_grid_1_AutoML_1_20240812_95257_model_138'
saved_model = h2o.load_model(model_path_12)
predict = saved_model.predict(score)
print(f"3 Hour Gage height forecast (ft): {predict}")

In [None]:
model_path_24 = 'model/24_time_units/DeepLearning_grid_1_AutoML_1_20240812_110712_model_2'
saved_model = h2o.load_model(model_path_24)
predict = saved_model.predict(score)
print(f"6 Hour Gage height forecast (ft): {predict}")

In [None]:
model_path_48 = 'model/48_time_units/DeepLearning_grid_1_AutoML_1_20240812_101810_model_1'
saved_model = h2o.load_model(model_path_48)
predict = saved_model.predict(score)
print(f"12 Hour Gage height forecast (ft): {predict}")

In [None]:
model_path_96 = 'model/96_time_units/DeepLearning_grid_1_AutoML_1_20240812_104345_model_1'
saved_model = h2o.load_model(model_path_96)
predict = saved_model.predict(score)
print(f"24 Hour Gage height forecast (ft): {predict}")

In [None]:
h2o.cluster().shutdown()