In [1]:
from utils import non_weather_training_columns_list, with_weather_training_columns_list
import pandas as pd
import pickle 

pd.set_option('display.max_columns', None)

In [2]:
def forecast_next_24_hours_output_flow_rate(year, month, day, hour, save_df=True):
    
    input_df = pd.read_parquet("../data/silver/training_dataset.parquet")
    timestamp = pd.Timestamp(year=year, month=month, day=day, hour=hour)
    input_df = input_df[input_df["timestamp"] == timestamp]
    
    X = input_df[non_weather_training_columns_list]
    X_weather = input_df[with_weather_training_columns_list]
    
    predictions = []
    for i in range(1, 25):
        new_prediction = {}
        next_timestamp = timestamp + pd.Timedelta(hours=i)
        model = pickle.load(open(f"../models/xgb_{i}h.pkl", "rb")) # aqui tem que mudar, esse path vai dar erro
        new_prediction['timestamp'] = next_timestamp
        new_prediction['forecasted_output_flow_rate'] = round(float(model.predict(X)[0]), 2)
        predictions.append(new_prediction)


    weather_predictions = []
    for i in range(1, 25):
        new_prediction = {}
        next_timestamp = timestamp + pd.Timedelta(hours=i)
        model = pickle.load(open(f"../models/xgb_with_weather_{i}h.pkl", "rb")) # aqui tem que mudar, esse path vai dar erro
        new_prediction['timestamp'] = next_timestamp
        new_prediction['weather_forecasted_output_flow_rate'] = round(float(model.predict(X_weather)[0]), 2)
        weather_predictions.append(new_prediction)
        
    predictions = pd.DataFrame(predictions)
    weather_predictions = pd.DataFrame(weather_predictions)
    merged_df = pd.merge(predictions, weather_predictions, on='timestamp')
    
    if not save_df:
        return merged_df
    merged_df.to_parquet("../data/gold/question_4_and_7_gold.parquet")

In [3]:
def simulate_empyting_reservoir(year, month, day, hour):
    input_df = pd.read_parquet("../data/silver/water_consumption_silver.parquet")
    input_df = input_df[input_df["timestamp"] == pd.Timestamp(year=year, month=month, day=day, hour=hour)]
    forecast_df = forecast_next_24_hours_output_flow_rate(year, month, day, hour, save_df=False)

In [4]:
out = simulate_empyting_reservoir(2023, 8, 15, 8)

In [5]:
out

In [14]:
original_input_df = pd.read_parquet("../data/silver/water_consumption_silver.parquet")
input_df = original_input_df[original_input_df["timestamp"] == pd.Timestamp(year=2023, month=8, day=15, hour=8)]
start_index = input_df.index.values.tolist()[0]
yesterday_df = original_input_df.iloc[start_index-24:start_index, :].copy()
forecast_df = forecast_next_24_hours_output_flow_rate(2023, 8, 15, 8, save_df=False).rename(columns={'forecasted_output_flow_rate': 'output_flow_rate'})
concated_df = pd.concat([input_df[['timestamp', 'reservoir_level_percentage', 'output_flow_rate']], forecast_df[['timestamp', 'output_flow_rate']]], axis=0)
concated_df['total_liters_out'] = concated_df['output_flow_rate'] * 3600
concated_df['percentage_out'] = concated_df['total_liters_out'] / 10000

while concated_df['reservoir_level_percentage'].isnull().any():
    concated_df['reservoir_level_percentage'] = concated_df['reservoir_level_percentage'].fillna(concated_df['reservoir_level_percentage'].shift(1) - concated_df['percentage_out'])
    
concated_df = concated_df.reset_index(drop=True)
hours_until_empyting = concated_df[concated_df.reservoir_level_percentage < 0].index.tolist()[0]
concated_df = concated_df.iloc[0:hours_until_empyting+1, :3]
concated_df.loc[:, 'simulation'] = True
yesterday_df.loc[:, 'simulation'] = False
concated_df = pd.concat([yesterday_df[['timestamp', 'reservoir_level_percentage', 'output_flow_rate', 'simulation']], concated_df], axis=0).reset_index(drop=True)

In [16]:
def simulate_empyting_reservoir(year, month, day, hour):
    original_input_df = pd.read_parquet("../data/silver/water_consumption_silver.parquet")
    input_df = original_input_df[original_input_df["timestamp"] == pd.Timestamp(year=year, month=month, day=day, hour=hour)]
    start_index = input_df.index.values.tolist()[0]
    yesterday_df = original_input_df.iloc[start_index-24:start_index, :].copy()
    forecast_df = forecast_next_24_hours_output_flow_rate(year, month, day, hour, save_df=False).rename(columns={'forecasted_output_flow_rate': 'output_flow_rate'})
    concated_df = pd.concat([input_df[['timestamp', 'reservoir_level_percentage', 'output_flow_rate']], forecast_df[['timestamp', 'output_flow_rate']]], axis=0)
    concated_df['total_liters_out'] = concated_df['output_flow_rate'] * 3600
    concated_df['percentage_out'] = concated_df['total_liters_out'] / 10000

    while concated_df['reservoir_level_percentage'].isnull().any():
        concated_df['reservoir_level_percentage'] = concated_df['reservoir_level_percentage'].fillna(concated_df['reservoir_level_percentage'].shift(1) - concated_df['percentage_out'])
        
    concated_df = concated_df.reset_index(drop=True)
    hours_until_empyting = concated_df[concated_df.reservoir_level_percentage < 0].index.tolist()[0]
    concated_df = concated_df.iloc[0:hours_until_empyting+1, :3]
    concated_df.loc[:, 'simulation'] = True
    yesterday_df.loc[:, 'simulation'] = False
    concated_df = pd.concat([yesterday_df[['timestamp', 'reservoir_level_percentage', 'output_flow_rate', 'simulation']], concated_df], axis=0).reset_index(drop=True)
    concated_df.to_parquet("../data/gold/question_5_answer.parquet")
    return hours_until_empyting

In [20]:
hours_until_empyting = simulate_empyting_reservoir(2023, 11, 13, 7)
print(f"Hours until the reservoir is empty: {hours_until_empyting} hours")
empyting_df = pd.read_parquet("../data/gold/question_5_answer.parquet")
empyting_df

Hours until the reservoir is empty: 7 hours


Unnamed: 0,timestamp,reservoir_level_percentage,output_flow_rate,simulation
0,2023-11-12 07:00:00,63.4,0.37,False
1,2023-11-12 08:00:00,63.4,0.37,False
2,2023-11-12 09:00:00,63.4,0.37,False
3,2023-11-12 10:00:00,63.4,0.37,False
4,2023-11-12 11:00:00,63.4,0.37,False
5,2023-11-12 12:00:00,63.4,0.37,False
6,2023-11-12 13:00:00,63.4,0.37,False
7,2023-11-12 14:00:00,63.4,0.37,False
8,2023-11-12 15:00:00,63.4,0.37,False
9,2023-11-12 16:00:00,63.4,0.37,False
