In [14]:
import altair as alt
import numpy as np
from dotenv import load_dotenv
import os



In [15]:
load_dotenv()
DATA_PATH = os.getenv("DATA_PATH")


In [16]:
# Load data
df = pd.read_parquet(f"{DATA_PATH}/inference.parquet")

# Compute prediction intervals
df['residuals'] = df['forecasted_value'] - df['value']
df['std_dev'] = df.groupby('subba')['residuals'].transform('std')
df['upper_bound'] = df['forecasted_value'] + df['std_dev']
df['lower_bound'] = df['forecasted_value'] - df['std_dev']

In [17]:
df

Unnamed: 0,period,subba,forecasted_value,subba-name,parent,parent-name,value,value-units,residuals,std_dev,upper_bound,lower_bound
0,2024-10-14 07:00:00+00:00,PGAE,,Pacific Gas and Electric,CISO,California Independent System Operator,11242.0,megawatthours,,447.608692,,
1,2024-10-14 08:00:00+00:00,PGAE,,Pacific Gas and Electric,CISO,California Independent System Operator,10567.0,megawatthours,,447.608692,,
2,2024-10-14 09:00:00+00:00,PGAE,,Pacific Gas and Electric,CISO,California Independent System Operator,10436.0,megawatthours,,447.608692,,
3,2024-10-14 10:00:00+00:00,PGAE,,Pacific Gas and Electric,CISO,California Independent System Operator,10255.0,megawatthours,,447.608692,,
4,2024-10-14 11:00:00+00:00,PGAE,,Pacific Gas and Electric,CISO,California Independent System Operator,9973.0,megawatthours,,447.608692,,
...,...,...,...,...,...,...,...,...,...,...,...,...
575,2024-10-20 03:00:00+00:00,VEA,55.0,,,,,,,32.760375,87.760375,22.239625
576,2024-10-20 04:00:00+00:00,VEA,53.0,,,,,,,32.760375,85.760375,20.239625
577,2024-10-20 05:00:00+00:00,VEA,48.0,,,,,,,32.760375,80.760375,15.239625
578,2024-10-20 06:00:00+00:00,VEA,43.0,,,,,,,32.760375,75.760375,10.239625


In [93]:
color_scheme = alt.Scale(domain=['PGAE', 'SCE', 'SDGE', 'VEA'],
                         range=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728'])

# Create a parameter for region selection
region_param = alt.param(
    name='region',
    value=df['subba'].unique()[0],  # Set default value
    bind=alt.binding_select(options=df['subba'].unique().tolist(), name="Select Region")
)

# Base chart
base = alt.Chart(df).encode(
    x=alt.X('period:T', axis=alt.Axis(title='Date', labelAngle=-45)),
    color=alt.Color('subba:N', scale=color_scheme, legend=None)
).transform_filter(
    alt.FieldEqualPredicate(field='subba', equal=region_param)
)

# Actual demand line
demand = base.mark_line(strokeWidth=3).encode(
    y=alt.Y('value:Q', axis=alt.Axis(title='Electricity Demand (MW)'))
)

# Forecasted demand line
forecast = base.mark_line(strokeWidth=3, strokeDash=[5,5]).encode(
    y='forecasted_value:Q'
)

# Prediction interval
interval = base.mark_area(opacity=0.2).encode(
    y='lower_bound:Q',
    y2='upper_bound:Q'
)

# Selector for hover
hover = alt.selection_point(
    fields=['period'],
    nearest=True,
    on='mouseover',
    empty='none'
)

# Add vertical line for hover
hover_line = alt.Chart(df).mark_rule(color='gray', strokeDash=[5, 5]).encode(
    x='period:T'
).transform_filter(hover)

# Combine charts
main_chart = (interval + demand + forecast + hover_line).add_params(
    hover
).encode(
    tooltip=[
        alt.Tooltip('period:T', title='Date'),
        alt.Tooltip('value:Q', title='Actual Demand', format='.2f'),
        alt.Tooltip('forecasted_value:Q', title='Forecasted Demand', format='.2f')
    ]
).properties(
    width=800,
    height=400,
    title=alt.TitleParams(
        ['Short Term Forecast of Electricity Demand in California',
         'Hourly Demand Forecast for Different Regions'],
        subtitle=[
            'This dashboard shows the latest data on electricity demand for the main 4 primary electric utility companies in California:',
            '• Pacific Gas and Electric (PGAE)',
            '• Southern California Edison (SCE)',
            '• San Diego Gas and Electric (SDGE)',
            '• Valley Electric Association (VEA)'
        ],
        anchor='start',
        fontSize=24,
        subtitleFontSize=14,
        offset=20
    )
).add_params(
    region_param
)

# Footer
footer = alt.Chart().mark_text(
    align='left',
    baseline='bottom',
    fontSize=10,
    dx=5,
    dy=-5
).encode(
    text=alt.value('Data Source: U.S. Energy Information Administration - EIA - Independent Statistics and Analysis')
).properties(width=800, height=30)

# Combine main chart and footer
final_chart = alt.vconcat(
    main_chart,
    footer,
    spacing=10
).resolve_scale(
    color='independent',
    y='independent'
).configure(
    background='#f9f9f9'
).configure_axis(
    labelColor='#333',
    titleColor='#333'
).configure_text(
    color='#333'
)

final_chart