# Para um determinado dia, horário e nível do reservatório, em quanto tempo ele se esvaziará caso falte energia?

In [1]:
from pathlib import Path
from xgboost import callback
import pandas as pd 
from datetime import timedelta
import pickle

In [69]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', None)

In [3]:
df_path = Path("../data/curated_data/water_consumption_curated_2.parquet")
df = pd.read_parquet(df_path)
df.head()

Unnamed: 0,timestamp,flow_in_(l/s),reservoir_level_(%),pressure_(mca),gmb_1_is_on,gmb_2_is_on,reservoir_level_liters,time_passed_seconds,liters_entered,flow_out_(l/s),year,month,week_of_year,day_of_week,day,hour,minute,second
1,2023-03-17 12:28:56,66.05,35.86,38.2,0,1,358600.0,3710.0,60000.0,49.877493,2023,3,11,4,17,12,28,56
2,2023-03-17 12:31:26,65.64,36.16,38.06,0,1,361600.0,150.0,3000.0,45.64,2023,3,11,4,17,12,31,26
3,2023-03-17 12:33:56,65.64,36.5,38.03,0,1,365000.0,150.0,3400.0,42.973333,2023,3,11,4,17,12,33,56
4,2023-03-17 12:36:26,65.64,36.8,38.17,0,1,368000.0,150.0,3000.0,45.64,2023,3,11,4,17,12,36,26
5,2023-03-17 12:38:56,65.24,36.8,38.17,0,1,368000.0,150.0,0.0,65.24,2023,3,11,4,17,12,38,56


In [4]:
df["time_passed_seconds"].mode()

0    150.0
Name: time_passed_seconds, dtype: float64

In [5]:
class LearningRateDecay(callback.TrainingCallback):
    def __init__(self, initial_lr=0.01, decay_rate=0.1, decay_steps=1000):
        self.initial_lr = initial_lr
        self.decay_rate = decay_rate
        self.decay_steps = decay_steps

    def after_iteration(self, model, epoch, evals_log):
        new_lr = self.initial_lr * self.decay_rate ** (epoch / self.decay_steps)
        model.set_param('learning_rate', new_lr)
        return False

In [8]:
# Load the forecasting model from a pickle file
with open('../models/xgb_flow_out_forecast.pkl', 'rb') as model_file:
    forecasting_model = pickle.load(model_file)
    
def seconds_to_hms(seconds):
    hours = seconds // 3600
    minutes = (seconds % 3600) // 60
    seconds = seconds % 60
    return hours, minutes, seconds

def update_time_related_columns(df):
    df['year'] = df['timestamp'].dt.year
    df['month'] = df['timestamp'].dt.month
    df['week_of_year'] = df['timestamp'].dt.isocalendar().week
    df['day_of_week'] = df['timestamp'].dt.dayofweek
    df['day'] = df['timestamp'].dt.day
    df['hour'] = df['timestamp'].dt.hour
    df['minute'] = df['timestamp'].dt.minute
    df['second'] = df['timestamp'].dt.second
    return df

In [119]:
def simulate_emptying(row, model, time_step=150, num_steps=100):
    time_elapsed = 0
    reservoir_level = row['reservoir_level_liters']
    
    while reservoir_level > 0:
        # Create a DataFrame with repeated rows and updated timestamps
        simulation_df = pd.DataFrame([row] * num_steps)
        last_timestamp = row['timestamp']
        simulation_df['timestamp'] = [last_timestamp + timedelta(seconds=i * time_step) for i in range(num_steps)]
        
        # Update time-related columns
        simulation_df = update_time_related_columns(simulation_df)
        
        # Prepare input features for prediction
        X = simulation_df[[
            "flow_in_(l/s)", "reservoir_level_(%)", "pressure_(mca)", "gmb_1_is_on", 
            "gmb_2_is_on", "reservoir_level_liters", "time_passed_seconds", "liters_entered", 
            "year", "month", "week_of_year", "day_of_week", "day", "hour", "second"
        ]]
        
        # Predict flow_out_forecast
        simulation_df['flow_out_forecast'] = model.predict(X)
        
        # Simulate the reservoir emptying
        for _, sim_row in simulation_df.iterrows():
            if reservoir_level <= 0:
                return seconds_to_hms(time_elapsed)
            liters_out = sim_row['flow_out_forecast'] * time_step
            reservoir_level -= liters_out
            time_elapsed += time_step
        
        # Update row for the next iteration
        row = simulation_df.iloc[-1].copy()
        row['reservoir_level_liters'] = reservoir_level
    
    return seconds_to_hms(time_elapsed)

hours, minutes, seconds = simulate_emptying(df.iloc[44522], forecasting_model)
print(f'Time to empty the reservoir: {hours} hours, {minutes} minutes, {seconds} seconds')

Time to empty the reservoir: 162 hours, 20 minutes, 0 seconds


In [120]:
# generate 100 random numbers between 0 and 100000
import random
random.seed(42)
random_numbers = random.sample(range(0, 100000), 100)
for i in random_numbers:
    hours, minutes, seconds = simulate_emptying(df.iloc[i], forecasting_model)
    print(f'Time to empty the reservoir: {hours} hours, {minutes} minutes, {seconds} seconds')

Time to empty the reservoir: 1 hours, 35 minutes, 0 seconds
Time to empty the reservoir: 4 hours, 45 minutes, 0 seconds
Time to empty the reservoir: 4 hours, 55 minutes, 0 seconds
Time to empty the reservoir: 2 hours, 17 minutes, 30 seconds
Time to empty the reservoir: 6 hours, 47 minutes, 30 seconds
Time to empty the reservoir: 1 hours, 52 minutes, 30 seconds
Time to empty the reservoir: 3 hours, 35 minutes, 0 seconds
Time to empty the reservoir: 4 hours, 50 minutes, 0 seconds
Time to empty the reservoir: 10 hours, 17 minutes, 30 seconds
Time to empty the reservoir: 66 hours, 25 minutes, 0 seconds
Time to empty the reservoir: 6 hours, 35 minutes, 0 seconds
Time to empty the reservoir: 4 hours, 12 minutes, 30 seconds
Time to empty the reservoir: 10 hours, 40 minutes, 0 seconds
Time to empty the reservoir: 3 hours, 45 minutes, 0 seconds
Time to empty the reservoir: 39 hours, 15 minutes, 0 seconds
Time to empty the reservoir: 3 hours, 42 minutes, 30 seconds
Time to empty the reservoir: 3