In [2]:
import pandas as pd

from analysis.jp.flat import prepared_data

def calculate_monthly_averages(df: pd.DataFrame = prepared_data):
    '''
    Calculates and prints out average kWh usage for January, December, April, and May.

    Parameters:
        df (pd.DataFrame): The dataframe containing 'hour', 'month', and 'kwh' columns.
    '''

    # Calculating average kWh for specific months
    for month in [1, 12, 4, 5]:  # January, December, April, May
        avg_kwh = df[df['month'] == month]['kwh'].mean()
        month_name = pd.to_datetime(f"{month}", format="%m").month_name()
        print(f"Average kWh in {month_name}: {avg_kwh:.2f}")

if __name__ == "__main__":
    calculate_monthly_averages()


Average kWh in January: 0.99
Average kWh in December: 1.01
Average kWh in April: 0.78
Average kWh in May: 0.73


In [3]:
import pandas as pd

from analysis.jp.flat import prepared_data

def calculate_period_average_costs(df: pd.DataFrame = prepared_data):
    '''
    Calculates and prints out the average costs for mid-peak, off-peak, and on-peak periods.

    Parameters:
        df (pd.DataFrame): The dataframe containing 'period', 'date', and 'total_cost' columns.
    '''

    # Grouping by period and calculating the average cost
    avg_costs = df.groupby('period')['total_cost'].mean()

    for period in avg_costs.index:
        print(f"Average cost for {period} period: ${avg_costs[period]:.2f}")

if __name__ == "__main__":
    calculate_period_average_costs()


Average cost for Mid-peak: 7AM to 5PM, 9PM to 11PM period: $0.16
Average cost for Off-peak: 12AM to 7AM period: $0.10
Average cost for On-peak: 5PM to 9PM period: $0.15


In [4]:
import pandas as pd
from utils.runtime import connect_to_db

def calculate_energy_usage_stats():
    '''
    Calculates and prints out the statistics for total energy usage.

    Statistics include:
        - Lowest and highest monthly kWh usage.
        - Average monthly energy usage before and after September 2022.
    '''

    # Connecting to the database and querying the data
    query = """
        SELECT date, kwh
        FROM fct_electric_brew fe
        LEFT JOIN dim_datetimes dd ON fe.dim_datetimes_id = dd.id
        WHERE dd.date <= '2023-07-31'
    """
    electric_brew = connect_to_db()
    df = electric_brew.query(query).to_df()

    # Preparing the dataframe
    df['month'] = df['date'].dt.to_period('M')
    monthly_usage = df.groupby('month')['kwh'].sum()

    # Lowest and highest monthly kWh usage
    lowest_usage = monthly_usage.min()
    highest_usage = monthly_usage.max()
    print(f"Lowest monthly usage: {lowest_usage} kWh")
    print(f"Highest monthly usage: {highest_usage} kWh")

    # Average usage before and after September 2022
    solar_start = pd.Period('2022-09')
    avg_usage_pre_solar = monthly_usage[monthly_usage.index < solar_start].mean()
    avg_usage_post_solar = monthly_usage[monthly_usage.index >= solar_start].mean()
    print(f"Average monthly usage before solar power: {avg_usage_pre_solar:.2f} kWh")
    print(f"Average monthly usage after solar power: {avg_usage_post_solar:.2f} kWh")

if __name__ == "__main__":
    calculate_energy_usage_stats()


Lowest monthly usage: 6966.835 kWh
Highest monthly usage: 15031.381 kWh
Average monthly usage before solar power: 11003.24 kWh
Average monthly usage after solar power: 12216.07 kWh


In [6]:
import pandas as pd
from utils.runtime import connect_to_db

def calculate_cost_statistics():
    '''
    Calculates and prints out the mean monthly total energy cost, and mean monthly costs by generation type (solar and conventional).

    '''

    # Connecting to the database and querying the data
    query = """
        SELECT supplier, date, kwh, total_cost
        FROM fct_electric_brew fe
        LEFT JOIN dim_datetimes dd ON fe.dim_datetimes_id = dd.id
        LEFT JOIN dim_bills db ON fe.dim_bills_id = db.id
        WHERE dd.date <= '2023-07-31';
    """
    electric_brew = connect_to_db()
    df = electric_brew.query(query).to_df()

    # Preparing the dataframe
    df['month'] = df['date'].dt.to_period('M')
    df.sort_values('date', inplace=True)
    df = df.loc[df.total_cost != 0]
    df['energy_type'] = df['supplier'].apply(lambda x: 'Solar' if x == 'Ampion' else 'conventional_supplier')

    # Mean monthly total energy cost
    mean_monthly_cost = df.groupby('month')['total_cost'].sum().mean()
    print(f"Mean monthly total energy cost: ${mean_monthly_cost:.2f}")

    # Monthly costs by generation type
    monthly_costs_by_type = df.groupby(['energy_type', 'month'])['total_cost'].sum()
    mean_monthly_costs_by_type = monthly_costs_by_type.groupby('energy_type').mean()
    for energy_type in mean_monthly_costs_by_type.index:
        print(f"Mean monthly cost for {energy_type}: ${mean_monthly_costs_by_type[energy_type]:.2f}")

if __name__ == "__main__":
    calculate_cost_statistics()


Mean monthly total energy cost: $1773.40
Mean monthly cost for Solar: $1461.84
Mean monthly cost for conventional_supplier: $1074.25


In [11]:
import pandas as pd
from utils.runtime import connect_to_db

def calculate_solar_projections_statistics():
    '''
    Calculates and prints out detailed statistics for actual vs. projected energy costs in a hypothetical full solar scenario,
    and identifies months with significant cost differences.

    Statistics include:
        - Average actual cost per month.
        - Average projected cost per month under full solar.
        - Average cost per kWh for solar energy.
        - Standard deviation of the cost per kWh for solar energy.
        - Months with notably high percentage differences.
    '''

    # Connecting to the database and querying the data
    query = """
        SELECT supplier, date, kwh, total_cost
        FROM fct_electric_brew fe
        LEFT JOIN dim_datetimes dd ON fe.dim_datetimes_id = dd.id
        LEFT JOIN dim_bills db ON fe.dim_bills_id = db.id
        WHERE dd.date >= '2022-09-01' AND dd.date <= '2023-07-31';
    """
    electric_brew = connect_to_db()
    df = electric_brew.query(query).to_df()

    # Engineering the dataframe
    df['month'] = df['date'].dt.to_period('M') 
    df.sort_values('date', inplace=True)
    df = df.loc[df.total_cost != 0]
    df['energy_type'] = df['supplier'].apply(lambda x: 'Solar' if x == 'Ampion' else 'conventional_supplier')
    df_grouped = df.groupby(['month', 'energy_type'], sort=False).agg({'total_cost': 'sum', 'kwh': 'sum'}).unstack(fill_value=0)
    df_grouped['solar_cost_per_kwh'] = df_grouped['total_cost']['Solar'] / df_grouped['kwh']['Solar']
    df_grouped['total_kwh'] = df_grouped['kwh']['Solar'] + df_grouped['kwh']['conventional_supplier']
    df_grouped['total_cost_2'] = df_grouped['total_cost']['Solar'] + df_grouped['total_cost']['conventional_supplier']
    df_grouped['projected_costs'] = df_grouped['solar_cost_per_kwh'] * df_grouped['total_kwh']

    # Calculating statistics
    avg_actual_cost_per_month = df_grouped['total_cost_2'].mean()
    avg_projected_cost_per_month = df_grouped['projected_costs'].mean()
    avg_cost_per_kwh_solar = df_grouped['solar_cost_per_kwh'].mean()
    std_dev_cost_per_kwh_solar = df_grouped['solar_cost_per_kwh'].std()

    # Calculate percentage differences for all months
    percent_diff = (((df_grouped['projected_costs'] - df_grouped['total_cost_2']) / df_grouped['total_cost_2']) * 100)

    # Identify significant months
    significant_months = percent_diff[percent_diff.abs() > 10]  # Threshold for significance set at 10%

    print(f"Average actual cost per month: ${avg_actual_cost_per_month:.2f}")
    print(f"Average projected cost per month under full solar: ${avg_projected_cost_per_month:.2f}")
    print(f"Average cost per kWh for solar energy: ${avg_cost_per_kwh_solar:.3f}")
    print(f"Standard deviation of the cost per kWh for solar energy: ${std_dev_cost_per_kwh_solar:.3f}")
    print("Significant months (with percentage difference > 10%):")
    for month, diff in significant_months.items():
        print(f"  - {month}: {diff:.2f}%")

    electric_brew.close()

if __name__ == "__main__":
    calculate_solar_projections_statistics()


Average actual cost per month: $2362.32
Average projected cost per month under full solar: $2524.36
Average cost per kWh for solar energy: $0.224
Standard deviation of the cost per kWh for solar energy: $0.025
Significant months (with percentage difference > 10%):
  - 2022-12: 50.31%
  - 2023-01: 36.41%
