In [1]:
from xgboost import XGBRegressor
from datetime import timedelta
from xgboost import callback
from typing import Tuple
from pathlib import Path
import pandas as pd
import pickle

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
df_path = Path('../data/curated_data/water_consumption_curated_2.parquet')
df = pd.read_parquet(df_path)
df.head()

Unnamed: 0,id,timestamp,flow_in_(l/s),reservoir_level_(%),pressure_(mca),gmb_1_is_on,gmb_2_is_on,reservoir_level_liters,time_passed_seconds,liters_should_have_entered,liters_entered,liters_out,flow_out_(l/s),year,month,week_of_year,day_of_week,day,hour,second,flow_out_forecast
1,1,2023-03-17 12:28:56,66.05,35.86,38.2,0,1,358600.0,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347
2,2,2023-03-17 12:31:26,65.64,36.16,38.06,0,1,361600.0,150.0,9846.0,3000.0,6846.0,45.64,2023,3,11,4,17,12,26,45.883347
3,3,2023-03-17 12:33:56,65.64,36.5,38.03,0,1,365000.0,150.0,9846.0,3400.0,6446.0,42.97,2023,3,11,4,17,12,56,42.028744
4,4,2023-03-17 12:36:26,65.64,36.8,38.17,0,1,368000.0,150.0,9846.0,3000.0,6846.0,45.64,2023,3,11,4,17,12,26,45.883347
5,5,2023-03-17 12:38:56,65.24,36.8,38.17,0,1,368000.0,150.0,9786.0,0.0,9786.0,65.24,2023,3,11,4,17,12,56,64.437569


In [4]:
forecasting_model = pickle.load(open('../models/xgb_flow_out_forecast_2.pkl', 'rb'))

In [16]:
def seconds_to_hms(seconds):
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return hours, minutes, seconds

def correct_dtypes(df):
    df = df.copy()  # Ensure we are working on a copy
    df['flow_in_(l/s)'] = df['flow_in_(l/s)'].astype(float)
    df['reservoir_level_(%)'] = df['reservoir_level_(%)'].astype(float)
    df['pressure_(mca)'] = df['pressure_(mca)'].astype(float)
    df['gmb_1_is_on'] = df['gmb_1_is_on'].astype(int)
    df['gmb_2_is_on'] = df['gmb_2_is_on'].astype(int)
    df['reservoir_level_liters'] = df['reservoir_level_liters'].astype(float)
    df['time_passed_seconds'] = df['time_passed_seconds'].astype(float)
    df['liters_entered'] = df['liters_entered'].astype(float)
    df['year'] = df['year'].astype('int32')
    df['month'] = df['month'].astype('int32')
    df['week_of_year'] = df['week_of_year'].astype('UInt32')
    df['day_of_week'] = df['day_of_week'].astype('int32')
    df['day'] = df['day'].astype('int32')
    df['hour'] = df['hour'].astype('int32')
    df['second'] = df['second'].astype('int32')
    return df

def create_new_row(row, time_step):
    next_row = row.copy()
    next_row['timestamp'] = pd.to_datetime(row['timestamp']) + timedelta(seconds=time_step)
    next_row['year'] = next_row['timestamp'].year
    next_row['month'] = next_row['timestamp'].month
    next_row['week_of_year'] = next_row['timestamp'].isocalendar()[1]
    next_row['day_of_week'] = next_row['timestamp'].weekday()
    next_row['day'] = next_row['timestamp'].day
    next_row['hour'] = next_row['timestamp'].hour
    next_row['minute'] = next_row['timestamp'].minute
    next_row['second'] = next_row['timestamp'].second
    return next_row

def predict_flow_out_forecast(row, model):
    X = row[[
        "flow_in_(l/s)", "reservoir_level_(%)", "pressure_(mca)", "gmb_1_is_on", 
        "gmb_2_is_on", "reservoir_level_liters", "time_passed_seconds", "liters_entered", 
        "year", "month", "week_of_year", "day_of_week", "day", "hour", "second"
    ]]
    
    # Ensure all columns are numeric
    X = correct_dtypes(X)
    
    # Predict flow_out_forecast
    row['flow_out_forecast'] = model.predict(X)
    return row

def simulate_emptying(
    row: pd.Series,
    model: XGBRegressor = None,
    time_step: int = 150,
    num_steps: int = 10
):
    next_row = create_new_row(row, time_step)
    next_row_frame = next_row.to_frame().T
    next_row_frame = correct_dtypes(next_row_frame)
    
    X = next_row_frame[[
        "flow_in_(l/s)", "reservoir_level_(%)", "pressure_(mca)", "gmb_1_is_on", 
        "gmb_2_is_on", "reservoir_level_liters", "time_passed_seconds", "liters_entered", 
        "year", "month", "week_of_year", "day_of_week", "day", "hour", "second"
    ]]
    
    # Uncomment the following line once you have a trained model
    next_row_frame['flow_out_forecast'] = model.predict(X)
    next_row['flow_out_forecast'] = next_row_frame['flow_out_forecast'].values[0]
    next_row['flow_out_(l/s)'] = next_row['flow_out_forecast'] 
    next_row['liters_out'] = next_row['flow_out_(l/s)'] * next_row['time_passed_seconds']
    next_row['reservoir_level_liters'] = row['reservoir_level_liters'] - next_row['liters_out']
    next_row['gmb_1_is_on'] = 0
    next_row['gmb_2_is_on'] = 0
    next_row['flow_in_(l/s)'] = 0
    next_row['liters_should_have_entered'] = 0
    next_row['liters_entered'] = 0
    next_row['reservoir_level_(%)'] = next_row['reservoir_level_liters'] / 1_000_000 * 100
    
    return row, next_row

In [17]:
row, next_row = simulate_emptying(df.iloc[10, :], forecasting_model)
print(row)
print('-' * 100)
print(next_row)

id                                             11
timestamp                     2023-03-17 12:57:58
flow_in_(l/s)                                67.3
reservoir_level_(%)                          38.7
pressure_(mca)                              37.97
gmb_1_is_on                                     0
gmb_2_is_on                                     1
reservoir_level_liters                   387000.0
time_passed_seconds                         150.0
liters_should_have_entered                10095.0
liters_entered                             3400.0
liters_out                                 6695.0
flow_out_(l/s)                              44.63
year                                         2023
month                                           3
week_of_year                                   11
day_of_week                                     4
day                                            17
hour                                           12
second                                         58
