In [1]:
import plotly.graph_objects as go
import pandas as pd

In [2]:
import pickle
from utils import non_weather_training_columns_list, with_weather_training_columns_list

In [7]:
def forecast_next_24_hours_output_flow_rate(year, month, day, hour, save_df=True):
    
    water_consumption_silver = pd.read_parquet("../data/silver/water_consumption_silver.parquet")
    original_input_df = pd.read_parquet("../data/silver/training_dataset.parquet")
    timestamp = pd.Timestamp(year=year, month=month, day=day, hour=hour)
    input_df = original_input_df[original_input_df["timestamp"] == timestamp]
    
    X = input_df[non_weather_training_columns_list]
    X_weather = input_df[with_weather_training_columns_list]
    
    predictions = []
    for i in range(1, 25):
        new_prediction = {}
        next_timestamp = timestamp + pd.Timedelta(hours=i)
        model = pickle.load(open(f"../models/xgb_{i}h.pkl", "rb"))
        new_prediction['timestamp'] = next_timestamp
        new_prediction['forecasted_output_flow_rate'] = round(float(model.predict(X)[0]), 2)
        predictions.append(new_prediction)


    weather_predictions = []
    for i in range(1, 25):
        new_prediction = {}
        next_timestamp = timestamp + pd.Timedelta(hours=i)
        model = pickle.load(open(f"../models/xgb_with_weather_{i}h.pkl", "rb"))
        new_prediction['timestamp'] = next_timestamp
        new_prediction['weather_forecasted_output_flow_rate'] = round(float(model.predict(X_weather)[0]), 2)
        weather_predictions.append(new_prediction)
        
    predictions = pd.DataFrame(predictions)
    weather_predictions = pd.DataFrame(weather_predictions)
    merged_df = pd.merge(predictions, weather_predictions, on='timestamp')
    
    last_timestamp = merged_df.timestamp.iloc[0]
    first_timestamp = last_timestamp - pd.Timedelta(hours=72)
    timestamps = pd.date_range(start=first_timestamp, end=last_timestamp-pd.Timedelta(hours=1), freq='h')
    water_consumption_silver = water_consumption_silver[water_consumption_silver.timestamp.isin(timestamps)]
    water_consumption_silver = water_consumption_silver[['timestamp', 'output_flow_rate']].rename(columns={'output_flow_rate': 'forecasted_output_flow_rate'})
    water_consumption_silver['weather_forecasted_output_flow_rate'] = water_consumption_silver['forecasted_output_flow_rate']
    water_consumption_silver['forecasted'] = False
    merged_df['forecasted'] = True
    final_df = pd.concat([water_consumption_silver, merged_df], axis=0).reset_index(drop=True)
    
    if not save_df:
        return final_df
    final_df.to_parquet("../data/gold/question_4_and_7_answer_test.parquet")


def simulate_empyting_reservoir(year, month, day, hour, return_df=True):
    original_input_df = pd.read_parquet("../data/silver/water_consumption_silver.parquet")
    input_df = original_input_df[original_input_df["timestamp"] == pd.Timestamp(year=year, month=month, day=day, hour=hour)]
    start_index = input_df.index.values.tolist()[0]
    yesterday_df = original_input_df.iloc[start_index-24:start_index, :].copy()
    forecast_df = forecast_next_24_hours_output_flow_rate(year, month, day, hour, save_df=False).rename(columns={'forecasted_output_flow_rate': 'output_flow_rate'})
    concated_df = pd.concat([input_df[['timestamp', 'reservoir_level_percentage', 'output_flow_rate']], forecast_df[['timestamp', 'output_flow_rate']]], axis=0)
    concated_df['total_liters_out'] = concated_df['output_flow_rate'] * 3600
    concated_df['percentage_out'] = concated_df['total_liters_out'] / 10000

    while concated_df['reservoir_level_percentage'].isnull().any():
        concated_df['reservoir_level_percentage'] = concated_df['reservoir_level_percentage'].fillna(concated_df['reservoir_level_percentage'].shift(1) - concated_df['percentage_out'])
        
    concated_df = concated_df.reset_index(drop=True)
    hours_until_empyting = concated_df[concated_df.reservoir_level_percentage < 0].index.tolist()[0]
    concated_df = concated_df.iloc[0:hours_until_empyting+1, :3]
    concated_df.loc[:, 'simulation'] = True
    yesterday_df.loc[:, 'simulation'] = False
    
    return concated_df, yesterday_df
    new_concated_df = pd.concat([yesterday_df[['timestamp', 'reservoir_level_percentage', 'output_flow_rate', 'simulation']], concated_df], axis=0).reset_index(drop=True)
    if return_df: 
        return new_concated_df
    concated_df.to_parquet("../data/gold/question_5_answer.parquet")
    return hours_until_empyting

In [8]:
concated, yesterday = simulate_empyting_reservoir(2023, 11, 13, 8, return_df=True)

In [10]:
yesterday.head()

Unnamed: 0,timestamp,hour,day_of_week,week_of_year,year,input_flow_rate,reservoir_level_percentage,pressure,output_flow_rate,air_temp_c,total_precip_mm,relative_humidity_percentage,pump_1,pump_2,simulation
5756,2023-11-12 08:00:00,15,5,45,2023,0.0,63.4,30.45,0.37,23.3,0.0,86.0,0.0,0.0,False
5757,2023-11-12 09:00:00,15,5,45,2023,0.0,63.4,30.45,0.37,23.3,0.0,86.0,0.0,0.0,False
5758,2023-11-12 10:00:00,15,5,45,2023,0.0,63.4,30.45,0.37,23.3,0.0,86.0,0.0,0.0,False
5759,2023-11-12 11:00:00,15,5,45,2023,0.0,63.4,30.45,0.37,23.3,0.0,86.0,0.0,0.0,False
5760,2023-11-12 12:00:00,15,5,45,2023,0.0,63.4,30.45,0.37,23.3,0.0,86.0,0.0,0.0,False


In [9]:
concated.head()

Unnamed: 0,timestamp,reservoir_level_percentage,output_flow_rate,simulation
0,2023-11-13 08:00:00,44.56,0.0,True
1,2023-11-10 09:00:00,18.0316,73.69,True
2,2023-11-10 10:00:00,-8.4968,73.69,True


In [5]:
def create_question_5_plot(input_df):
    df = input_df.copy()
    real_data = df[df.simulation==False]
    simulation_data = df[df.simulation==True]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=real_data.timestamp, y=real_data.reservoir_level_percentage, mode='lines+markers', name='Nivel do Reservatorio Real', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=simulation_data.timestamp, y=simulation_data.reservoir_level_percentage, mode='lines+markers', name='Nivel do Reservatorio Simulado', line=dict(color='red')))
    fig.update_layout(title={'text': 'Simulacao de Esvaziamento do Reservatorio em Queda de Energia', 'x': 0.5, 'xanchor': 'center'},xaxis_title='Horario', yaxis_title='Porcentagem do Reservatorio',legend_title='Type')
    return fig

In [6]:
fig = create_question_5_plot(df)
fig.show()