## Qual a curva típica da saída do reservatório ao longo de 24h durante os dias úteis? E nos finais de semana?

In [1]:
import plotly.express as px
from pathlib import Path
import pandas as pd 

In [2]:
df_path = Path("../data/curated_data/water_consumption_curated_1.parquet")
df = pd.read_parquet(df_path)
df.head()

Unnamed: 0,id,timestamp,flow_in_(l/s),reservoir_level_(%),pressure_(mca),gmb_1_is_on,gmb_2_is_on,reservoir_level_liters,time_passed_seconds,liters_should_have_entered,liters_entered,liters_out,flow_out_(l/s)
0,0,2023-03-17 11:27:06,68.59,29.86,38.2,0,1,298600.0,,,,,
1,1,2023-03-17 12:28:56,66.05,35.86,38.2,0,1,358600.0,3710.0,245045.5,60000.0,185045.5,49.88
2,2,2023-03-17 12:31:26,65.64,36.16,38.06,0,1,361600.0,150.0,9846.0,3000.0,6846.0,45.64
3,3,2023-03-17 12:33:56,65.64,36.5,38.03,0,1,365000.0,150.0,9846.0,3400.0,6446.0,42.97
4,4,2023-03-17 12:36:26,65.64,36.8,38.17,0,1,368000.0,150.0,9846.0,3000.0,6846.0,45.64


In [3]:
# Converter a coluna 'timestamp' para datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Adicionar uma coluna para o dia da semana (0 = segunda-feira, 6 = domingo)
df['day_of_week'] = df['timestamp'].dt.dayofweek

# Adicionar uma coluna para a hora do dia
df['hour'] = df['timestamp'].dt.hour

df.head()

Unnamed: 0,id,timestamp,flow_in_(l/s),reservoir_level_(%),pressure_(mca),gmb_1_is_on,gmb_2_is_on,reservoir_level_liters,time_passed_seconds,liters_should_have_entered,liters_entered,liters_out,flow_out_(l/s),day_of_week,hour
0,0,2023-03-17 11:27:06,68.59,29.86,38.2,0,1,298600.0,,,,,,4,11
1,1,2023-03-17 12:28:56,66.05,35.86,38.2,0,1,358600.0,3710.0,245045.5,60000.0,185045.5,49.88,4,12
2,2,2023-03-17 12:31:26,65.64,36.16,38.06,0,1,361600.0,150.0,9846.0,3000.0,6846.0,45.64,4,12
3,3,2023-03-17 12:33:56,65.64,36.5,38.03,0,1,365000.0,150.0,9846.0,3400.0,6446.0,42.97,4,12
4,4,2023-03-17 12:36:26,65.64,36.8,38.17,0,1,368000.0,150.0,9846.0,3000.0,6846.0,45.64,4,12


In [4]:
df_weekdays = df[df['day_of_week'] < 5]
df_weekends = df[df['day_of_week'] >= 5]

# Agrupar por hora e calcular a média da saída do reservatório para dias úteis
flow_out_weekdays = df_weekdays.groupby('hour')['flow_out_(l/s)'].mean().reset_index()

# Agrupar por hora e calcular a média da saída do reservatório para finais de semana
flow_out_weekends = df_weekends.groupby('hour')['flow_out_(l/s)'].mean().reset_index()

# Adicionar coluna para diferenciar dias úteis e finais de semana
flow_out_weekdays['Tipo'] = 'Dias Úteis'
flow_out_weekends['Tipo'] = 'Finais de Semana'

# Combinar os dados em um único DataFrame
df_combined = pd.concat([flow_out_weekdays, flow_out_weekends])
df_combined.head()

Unnamed: 0,hour,flow_out_(l/s),Tipo
0,0,27.611543,Dias Úteis
1,1,25.07562,Dias Úteis
2,2,25.063462,Dias Úteis
3,3,23.032023,Dias Úteis
4,4,23.727896,Dias Úteis


In [5]:
# Criar o gráfico com Plotly Express
fig = px.line(df_combined, x='hour', y='flow_out_(l/s)', color='Tipo',
              labels={'hour': 'Hora do Dia', 'flow_out_(l/s)': 'Saída do Reservatório (l/s)'},
              title='Curva Típica da Saída do Reservatório ao Longo de 24h')

# Mostrar o gráfico
fig.show()