In [1]:
from pathlib import Path
from typing import Tuple
import pandas as pd

In [2]:
df_path = Path('../data/curated_data/water_consumption_curated_1.parquet')
df = pd.read_parquet(df_path)

In [3]:
def create_date_related_columns(original_df: pd.DataFrame) -> pd.DataFrame:
    df = original_df.copy()
    df["date"] = df["timestamp"].dt.date
    df["hour"] = df["timestamp"].dt.hour
    
    # Identify peak hours (18:00 to 21:00)
    peak_hours = (df["hour"] >= 18) & (df["hour"] <= 21)
    df["is_peak_hour"] = peak_hours
    
    # Drop rows with missing values in 'time_passed_seconds'
    df = df.dropna(subset=["time_passed_seconds"])
    
    # Calculate the duration of gmb_1 and gmb_2 being on
    df['gmb_1_duration'] = df['gmb_1_is_on'] * df['time_passed_seconds']
    df['gmb_2_duration'] = df['gmb_2_is_on'] * df['time_passed_seconds']
    
    return df
    
    
def get_avg_use_per_bomb(original_df: pd.DataFrame) -> Tuple[float, float, float, float]:
    df = original_df.copy()
    
    # Sum water bombs usage time per day and hour
    daily_peak_usage = df[df['is_peak_hour']].groupby('date').agg({'gmb_1_duration': 'sum', 'gmb_2_duration': 'sum'})
    daily_off_peak_usage = df[~df['is_peak_hour']].groupby('date').agg({'gmb_1_duration': 'sum', 'gmb_2_duration': 'sum'})

    # Calculate water bombs average usage time per day and hour
    gmb_1_peak_avg = daily_peak_usage['gmb_1_duration'].mean() / 3600  # convert seconds to hours
    gmb_1_off_peak_avg = daily_off_peak_usage['gmb_1_duration'].mean() / 3600  # convert seconds to hours

    gmb_2_peak_avg = daily_peak_usage['gmb_2_duration'].mean() / 3600  # convert seconds to hours
    gmb_2_off_peak_avg = daily_off_peak_usage['gmb_2_duration'].mean() / 3600  # convert seconds to hours
    
    return gmb_1_peak_avg, gmb_1_off_peak_avg, gmb_2_peak_avg, gmb_2_off_peak_avg

In [4]:
gmb_1_peak_avg, gmb_1_off_peak_avg, gmb_2_peak_avg, gmb_2_off_peak_avg = get_avg_use_per_bomb(
    create_date_related_columns(df)
)

In [5]:
print(f"Tempo médio de uso da bomba 1 em horário de ponta: {gmb_1_peak_avg:.2f} horas")
print(f"Tempo médio de uso da bomba 1 em horário fora de ponta: {gmb_1_off_peak_avg:.2f} horas")
print(f"Tempo médio de uso da bomba 2 em horário de ponta: {gmb_2_peak_avg:.2f} horas")
print(f"Tempo médio de uso da bomba 2 em horário fora de ponta: {gmb_2_off_peak_avg:.2f} horas")

Tempo médio de uso da bomba 1 em horário de ponta: 1.41 horas
Tempo médio de uso da bomba 1 em horário fora de ponta: 8.00 horas
Tempo médio de uso da bomba 2 em horário de ponta: 0.76 horas
Tempo médio de uso da bomba 2 em horário fora de ponta: 4.44 horas
