In [1]:
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import h2o
from h2o.automl import H2OAutoML
import json
import requests

%matplotlib inline

In [2]:
#01388500 - Pompton River at Route 23, Pompton Plains NJ

In [3]:
# Get the USGS water data as JSON
usgs_water_api = "http://waterservices.usgs.gov/nwis/iv?sites=01388500&period=P7D&format=json"
api_response = requests.get(usgs_water_api)
water_data = api_response.json()

# Extract interesting data from the JSON
site_name = water_data["value"]["timeSeries"][0]["sourceInfo"]["siteName"]
station_id = water_data["value"]["timeSeries"][0]["sourceInfo"]["siteCode"][0]["value"]
agency_code = water_data["value"]["timeSeries"][0]["sourceInfo"]["siteCode"][0]["agencyCode"]

date_time_last = water_data["value"]["timeSeries"][1]["values"][0]["value"][-3]["dateTime"]
date_time_mid = water_data["value"]["timeSeries"][1]["values"][0]["value"][-2]["dateTime"]
date_time_current = water_data["value"]["timeSeries"][1]["values"][0]["value"][-1]["dateTime"]

streamflow_last = water_data["value"]["timeSeries"][0]["values"][0]["value"][-3]["value"]
streamflow_mid = water_data["value"]["timeSeries"][0]["values"][0]["value"][-2]["value"]
streamflow_current = water_data["value"]["timeSeries"][0]["values"][0]["value"][-1]["value"]

gage_height_last = water_data["value"]["timeSeries"][1]["values"][0]["value"][-3]["value"]
gage_height_mid = water_data["value"]["timeSeries"][1]["values"][0]["value"][-2]["value"]
gage_height_current = water_data["value"]["timeSeries"][1]["values"][0]["value"][-1]["value"]


# Print the data
print(f"Site name: {site_name}")

print(f"Current Datetime: {date_time_current}")
print(f"Last Datetime: {date_time_last}")

print(f"Station ID: {station_id}")
print(f"Agency code: {agency_code}")

print(f"Current Streamflow (ft3/s): {streamflow_last}")
print(f"Last Streamflow (ft3/s): {streamflow_current}")

print(f"Current Gage height (ft): {gage_height_last}")
print(f"Last Gage height (ft): {gage_height_current}")


Site name: Pompton River at Pompton Plains NJ
Current Datetime: 2024-08-14T14:45:00.000-04:00
Last Datetime: 2024-08-14T14:15:00.000-04:00
Station ID: 01388500
Agency code: USGS
Current Streamflow (ft3/s): 411
Last Streamflow (ft3/s): 406
Current Gage height (ft): 8.81
Last Gage height (ft): 8.80


In [4]:
realtime_score = [{'datetime': date_time_last, 'Gage': gage_height_last, 'Flow': streamflow_last},
                  {'datetime': date_time_mid, 'Gage': gage_height_mid, 'Flow': streamflow_mid },
                   {'datetime': date_time_current, 'Gage': gage_height_current, 'Flow': streamflow_current }]

In [5]:
df = pd.DataFrame(realtime_score)
df = df.set_index('datetime')

In [6]:
df['Gage'] = df['Gage'].astype(float)
df['Flow'] = df['Flow'].astype(float)

In [7]:
df['Gage_diff'] = df['Gage'].shift(1) - df['Gage']
df['Flow_diff'] = df['Flow'].shift(1) - df['Flow']

In [8]:
df.dtypes

Gage         float64
Flow         float64
Gage_diff    float64
Flow_diff    float64
dtype: object

In [9]:
print('number of rows = ', df.shape[0])

number of rows =  3


In [10]:
df = df.fillna(0)

In [11]:
df

Unnamed: 0_level_0,Gage,Flow,Gage_diff,Flow_diff
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-08-14T14:15:00.000-04:00,8.81,411.0,0.0,0.0
2024-08-14T14:30:00.000-04:00,8.8,406.0,0.01,5.0
2024-08-14T14:45:00.000-04:00,8.8,406.0,0.0,0.0


In [12]:
df = df[-1:]

In [13]:
print ("Will score this row, ", df)

Will score this row,                                 Gage   Flow  Gage_diff  Flow_diff
datetime                                                        
2024-08-14T14:45:00.000-04:00   8.8  406.0        0.0        0.0


In [14]:
h2o.init(nthreads=-1)

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: java version "1.8.0_411"; Java(TM) SE Runtime Environment (build 1.8.0_411-b09); Java HotSpot(TM) 64-Bit Server VM (build 25.411-b09, mixed mode)
  Starting server from /Users/tott/opt/anaconda3/envs/py310/lib/python3.10/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /var/folders/39/8gc7tmt964lf1zccbqy781gm0000gp/T/tmpo8nw8fw4
  JVM stdout: /var/folders/39/8gc7tmt964lf1zccbqy781gm0000gp/T/tmpo8nw8fw4/h2o_tott_started_from_python.out
  JVM stderr: /var/folders/39/8gc7tmt964lf1zccbqy781gm0000gp/T/tmpo8nw8fw4/h2o_tott_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,04 secs
H2O_cluster_timezone:,America/Los_Angeles
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.3
H2O_cluster_version_age:,7 months and 24 days
H2O_cluster_name:,H2O_from_python_tott_danq6z
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.542 Gb
H2O_cluster_total_cores:,10
H2O_cluster_allowed_cores:,10


In [15]:
score = h2o.H2OFrame(df)

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [16]:
#score['Gage'] = score['Gage'].isnumeric()
#score['Flow'] = score['Flow'].isnumeric()
score['Gage_diff'] = score['Gage_diff'].ascharacter().asnumeric()
score['Flow_diff'] = score['Flow_diff'].ascharacter().asnumeric()

In [17]:
score.describe()

Unnamed: 0,Gage,Flow,Gage_diff,Flow_diff
type,real,int,int,int
mins,8.8,406.0,0.0,0.0
mean,8.8,406.0,0.0,0.0
maxs,8.8,406.0,0.0,0.0
sigma,0.0,0.0,0.0,0.0
zeros,0,0,1,1
missing,0,0,0,0
0,8.8,406.0,0.0,0.0


In [18]:
print(score.columns)

['Gage', 'Flow', 'Gage_diff', 'Flow_diff']


In [19]:
model_path_12 = 'model/12_time_units/GLM_1_AutoML_1_20240814_114058'
saved_model = h2o.load_model(model_path_12)
predict = saved_model.predict(score)
print(f"1 Hour Gage height forecast (ft): {predict}")

glm prediction progress: |███████████████████████████████████████████████████████| (done) 100%
1 Hour Gage height forecast (ft):   predict
  8.80482
[1 row x 1 column]

