In [1]:
from xgboost import XGBRegressor
from datetime import timedelta
from xgboost import callback
from typing import Tuple
from pathlib import Path
import pandas as pd
import pickle

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
df_path = Path('../data/curated_data/water_consumption_curated_2.parquet')
df = pd.read_parquet(df_path)
df.head()

Unnamed: 0,id,timestamp,flow_in_(l/s),reservoir_level_(%),pressure_(mca),gmb_1_is_on,gmb_2_is_on,reservoir_level_liters,time_passed_seconds,liters_should_have_entered,liters_entered,liters_out,flow_out_(l/s),year,month,week_of_year,day_of_week,day,hour,second,flow_out_forecast
1,1,2023-03-17 12:28:56,66.05,35.86,38.2,0,1,358600.0,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347
2,2,2023-03-17 12:31:26,65.64,36.16,38.06,0,1,361600.0,150.0,9846.0,3000.0,6846.0,45.64,2023,3,11,4,17,12,26,45.883347
3,3,2023-03-17 12:33:56,65.64,36.5,38.03,0,1,365000.0,150.0,9846.0,3400.0,6446.0,42.97,2023,3,11,4,17,12,56,42.028744
4,4,2023-03-17 12:36:26,65.64,36.8,38.17,0,1,368000.0,150.0,9846.0,3000.0,6846.0,45.64,2023,3,11,4,17,12,26,45.883347
5,5,2023-03-17 12:38:56,65.24,36.8,38.17,0,1,368000.0,150.0,9786.0,0.0,9786.0,65.24,2023,3,11,4,17,12,56,64.437569


In [4]:
forecasting_model = pickle.load(open('../models/xgb_flow_out_forecast_2.pkl', 'rb'))

In [5]:
# Load the forecasting model from a pickle file
with open('../models/xgb_flow_out_forecast_2.pkl', 'rb') as model_file:
    forecasting_model = pickle.load(model_file)
    
def seconds_to_hms(seconds):
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return hours, minutes, seconds

def update_time_related_columns(df):
    df['year'] = df['timestamp'].dt.year
    df['month'] = df['timestamp'].dt.month
    df['week_of_year'] = df['timestamp'].dt.isocalendar().week
    df['day_of_week'] = df['timestamp'].dt.dayofweek
    df['day'] = df['timestamp'].dt.day
    df['hour'] = df['timestamp'].dt.hour
    df['minute'] = df['timestamp'].dt.minute
    df['second'] = df['timestamp'].dt.second
    return df

def forecast_flow_out(df, model):
    X = df[[
        "flow_in_(l/s)", "reservoir_level_(%)", "pressure_(mca)", "gmb_1_is_on", 
        "gmb_2_is_on", "reservoir_level_liters", "time_passed_seconds", "liters_entered", 
        "year", "month", "week_of_year", "day_of_week", "day", "hour", "second"
    ]]
    
    # Predict flow_out_forecast
    df['flow_out_forecast'] = model.predict(X)
    return df

def simulate_emptying(
    original_df: pd.DataFrame, 
    row_index: int, 
    model: XGBRegressor, 
    time_step: int = 150, 
    num_steps: int = 40
) -> Tuple[pd.DataFrame, Tuple[int, int, int]]:
    
    df = original_df.copy()
    time_elapsed = 0
    row = df.iloc[row_index, :]
    reservoir_level = row['reservoir_level_liters']
    print(f"Initial reservoir level: {reservoir_level}")
    simulations = []
    
    while reservoir_level > 0:
        simulation_df = pd.DataFrame([row] * num_steps).reset_index(drop=True)
        last_timestamp = row['timestamp']
        simulation_df['timestamp'] = [
            last_timestamp + timedelta(seconds=i * time_step) 
            for i in range(num_steps)
        ]
        simulation_df = update_time_related_columns(simulation_df)
        simulation_df = forecast_flow_out(simulation_df, model)
        
        for i, sim_row in simulation_df.iterrows():
            if i == 0:
                continue
            liters_out = sim_row['flow_out_forecast'] * time_step
            reservoir_level -= liters_out
            simulation_df.at[i, 'reservoir_level_liters'] = reservoir_level
            time_elapsed += time_step
            
            if simulation_df.iloc[i, :]['reservoir_level_liters'] <= 0:
                simulation_df = simulation_df[simulation_df.index <= i]
                simulations.append(simulation_df)
                if len(simulations) > 0:
                    simulation_df = pd.concat(simulations, ignore_index=True)
                return simulation_df, seconds_to_hms(time_elapsed)
        
        row = simulation_df.iloc[-1, :]
        old_simulation_df = simulation_df.copy()
        simulations.append(old_simulation_df)
        
    return simulation_df, seconds_to_hms(time_elapsed)

In [6]:
simulation_df, total_seconds = simulate_emptying(df, 0, forecasting_model)

Initial reservoir level: 358600.0


In [7]:
simulation_df.head(100)

Unnamed: 0,id,timestamp,flow_in_(l/s),reservoir_level_(%),pressure_(mca),gmb_1_is_on,gmb_2_is_on,reservoir_level_liters,time_passed_seconds,liters_should_have_entered,liters_entered,liters_out,flow_out_(l/s),year,month,week_of_year,day_of_week,day,hour,second,flow_out_forecast,minute
0,1,2023-03-17 12:28:56,66.05,35.86,38.2,0,1,358600.0,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347,28
1,1,2023-03-17 12:31:26,66.05,35.86,38.2,0,1,353075.89798,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,26,36.827347,31
2,1,2023-03-17 12:33:56,66.05,35.86,38.2,0,1,347551.795959,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347,33
3,1,2023-03-17 12:36:26,66.05,35.86,38.2,0,1,342027.693939,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,26,36.827347,36
4,1,2023-03-17 12:38:56,66.05,35.86,38.2,0,1,336503.591919,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347,38
5,1,2023-03-17 12:41:26,66.05,35.86,38.2,0,1,330979.489899,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,26,36.827347,41
6,1,2023-03-17 12:43:56,66.05,35.86,38.2,0,1,325455.387878,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347,43
7,1,2023-03-17 12:46:26,66.05,35.86,38.2,0,1,319931.285858,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,26,36.827347,46
8,1,2023-03-17 12:48:56,66.05,35.86,38.2,0,1,314407.183838,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,56,36.827347,48
9,1,2023-03-17 12:51:26,66.05,35.86,38.2,0,1,308883.081818,3710.0,245045.5,60000.0,185045.5,49.88,2023,3,11,4,17,12,26,36.827347,51


In [8]:
hours, minutes, seconds = total_seconds
print(f"Time elapsed: {hours} hours, {minutes} minutes, {seconds} seconds")

Time elapsed: 2 hours, 42 minutes, 30 seconds
