# About this notebook

This notebook is for test running the data pipeline and  as a sandbox for testing new functions that we are adding to data pipeline.

In [None]:
# import packages
import numpy as np
import pandas as pd
import argparse
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys

sys.path.append("../../hourly-egrid/")

# import local modules
import src.data_cleaning as data_cleaning
import src.load_data as load_data
import src.impute_hourly_profiles as impute_hourly_profiles
import src.eia930 as eia930
import src.output_data as output_data

from src.column_checks import get_dtypes, apply_dtypes

# Run the Pipeline

In [None]:
%cd ../src
%run data_pipeline --year 2020

In [None]:
%cd ../src
%run data_pipeline --small SMALL --year 2020

# Functions for loading intermediate outputs

In [None]:

# load data from csv
year = 2020
path_prefix = ''

cems = pd.read_csv(f'../data/outputs/{path_prefix}cems_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])
partial_cems_scaled = pd.read_csv(f'../data/outputs/{path_prefix}partial_cems_scaled_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])
eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])
plant_attributes = pd.read_csv(f"../data/outputs/{path_prefix}plant_static_attributes_{year}.csv")
primary_fuel_table = plant_attributes.drop_duplicates(subset="plant_id_eia")[["plant_id_eia", "plant_primary_fuel"]]
residual_profiles = pd.read_csv(f"../data/outputs/{path_prefix}residual_profiles_{year}.csv")

# TODO: Add output metrics back in

In [None]:
# output data quality metrics
output_data.output_to_results(
    validation.co2_source_metric(cems, partial_cems_scaled, monthly_eia_data_to_shape),
    "co2_measurement_source",
    "validation_metrics/",
    path_prefix,
)
output_data.output_to_results(
    validation.net_generation_method_metric(
        cems, partial_cems_scaled, monthly_eia_data_to_shape
    ),
    "net_generation_method",
    "validation_metrics/",
    path_prefix,
)

output_data.output_to_results(
    validation.hourly_profile_source_metric(
        cems, partial_cems_scaled, monthly_eia_data_to_shape
    ),
    "hourly_profile_method",
    "validation_metrics/",
    path_prefix,
)

# Check Negative scaled residuals

In [None]:
# load data from csv
year = 2020
path_prefix = ''
cems = pd.read_csv(f'../data/outputs/{path_prefix}cems_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])


partial_cems_scaled = pd.read_csv(f'../data/outputs/{path_prefix}partial_cems_scaled_{year}.csv', dtype=get_dtypes(), parse_dates=['datetime_utc', 'report_date'])
cems = data_cleaning.filter_unique_cems_data(cems, partial_cems_scaled)


In [None]:
eia930_data = eia930.load_chalendar_for_pipeline(
        "../data/downloads/eia930/chalendar/EBA_adjusted_elec.csv", year=year
    )

In [None]:
plant_attributes = pd.read_csv(f"../data/outputs/{path_prefix}plant_static_attributes_{year}.csv")

In [None]:
plant_attributes

In [None]:
cems

In [None]:
cems = cems.drop(columns=["distribution_flag_x","fuel_category_x","fuel_category_eia930_x","timezone_x","plant_primary_fuel","source","ba_code","ba_code_physical","state","distribution_flag_y","fuel_category_y","fuel_category_eia930_y","timezone_y"])

In [None]:
# Options for how to group. Could make command line arguments if needed.
# transmission = True and physical BA code is based on EIA-930 instructions
TRANSMISSION = False  # use only transmission-level connections?
BA_CODE = "ba_code"  # ba_code or ba_code_physical?

# Name column same as 930, hourly_profiles.
cems = cems.merge(plant_attributes, how="left", on="plant_id_eia")

cems = impute_hourly_profiles.aggregate_for_residual(
    cems, "datetime_utc", BA_CODE, transmission=TRANSMISSION
)


In [None]:
combined_data = eia930_data.merge(
    cems, how="left", on=["ba_code", "fuel_category_eia930", "datetime_utc"]
)
# only keep rows where local datetime is in the current year
combined_data = combined_data[
    combined_data["datetime_local"].apply(lambda x: x.year) == year
]

In [None]:
combined_data["net_generation_mwh"] = combined_data["net_generation_mwh"].fillna(0)

In [None]:
combined_data[(combined_data['net_generation_mwh_930'] < 1) & (combined_data['net_generation_mwh'] > 1)]

In [None]:
px.line(combined_data[(combined_data['ba_code'] == 'AECI') & (combined_data['fuel_category_eia930'] == 'natural_gas')], x='datetime_local', y=['net_generation_mwh_930','net_generation_mwh',"cems_scaled"])

In [None]:
combined_data[(combined_data['ba_code'] == 'AECI') & (combined_data['fuel_category_eia930'] == 'natural_gas')].corr()

In [None]:
# calculate the ratio of 930 net generation to cems net generation
# if correct, ratio should be >=1
combined_data["ratio"] = (
    combined_data["net_generation_mwh_930"]
    / combined_data["net_generation_mwh"]
)

In [None]:
# only keep scaling factors < 1, which means the data needs to be scaled
scaling_factors = combined_data[(combined_data["ratio"] < 1) & (combined_data["ratio"] > 0)]

In [None]:
# Find scaling factor
# only keep data where the cems data is greater than zero
#scaling_factors = combined_data.copy()[combined_data["net_generation_mwh"] != 0]

# find the minimum ratio for each ba-fuel
scaling_factors = (
    scaling_factors.groupby(["ba_code", "fuel_category_eia930"], dropna=False)[
        "ratio"
    ]
    .min()
    .reset_index().rename(columns={"ratio":"scaling_factor"})
)

In [None]:


# merge the scaling factor into the combined data
# for any BA-fuels without a scaling factor, fill with 1 (scale to 100% of the origina data)
combined_data = combined_data.merge(
    scaling_factors, how="left", on=["ba_code", "fuel_category_eia930"]
).fillna(1)

# calculate the scaled cems data
combined_data["cems_scaled"] = (
    combined_data["net_generation_mwh"] * combined_data["scaling_factor"]
)

# calculate the residual
combined_data["profile"] = (
    combined_data["net_generation_mwh_930"] - combined_data["cems_scaled"]
)

# identify the method used to calculate the profile
# if the scaling factor is 1, then the profile was not scaled
combined_data = combined_data.assign(
    profile_method=lambda x: np.where(
        (x.scaling_factor == 1), "residual", "scaled_residual"
    )
)

In [None]:
combined_data

# Test Hourly Profiles

In [None]:
# load data from csv
year = 2020
path_prefix = ''
eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}eia923_allocated_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])
plant_attributes = pd.read_csv(f"../data/outputs/{path_prefix}plant_static_attributes_{year}.csv")
primary_fuel_table = plant_attributes.drop_duplicates(subset="plant_id_eia")[["plant_id_eia", "plant_primary_fuel"]]
residual_profiles = pd.read_csv(f'../data/outputs/{path_prefix}residual_profiles_{year}.csv', dtype=get_dtypes(), parse_dates=['report_date'])


In [None]:
monthly_eia_data_to_shape = eia923_allocated[
        (eia923_allocated["hourly_data_source"] == "eia")
        & ~(eia923_allocated["fuel_consumed_mmbtu"].isna())
    ]

In [None]:
plant_attributes['ba_code'] = plant_attributes['ba_code'].fillna('RIMS')

In [None]:

hourly_profiles = impute_hourly_profiles.impute_missing_hourly_profiles(
    monthly_eia_data_to_shape, residual_profiles, plant_attributes, year
)


In [None]:
# make sure there is no duplicated data
hourly_profiles[hourly_profiles.duplicated(subset=["ba_code","fuel_category","datetime_local"])]

In [None]:
hourly_profiles[(hourly_profiles['ba_code'] == 'AVRN') & (hourly_profiles['fuel_category'] == 'solar') & (hourly_profiles['report_date'] == '2020-01-01')]

In [None]:
# make sure there is no na or inf data
hourly_profiles[hourly_profiles.isna().any(axis=1)]

In [None]:
# validate hourly profiles
# make sure that no profiles are negative
hourly_profiles[(hourly_profiles['profile'] < 0)]

In [None]:
hourly_profiles

In [None]:
hourly_profiles = impute_hourly_profiles.convert_profile_to_percent(hourly_profiles)

In [None]:
# make sure there is no na or inf data
hourly_profiles[hourly_profiles.isna().any(axis=1)]

In [None]:
hourly_profiles[(hourly_profiles['profile'] == np.inf)]

In [None]:
monthly_eia_data_to_shape_ba = monthly_eia_data_to_shape.merge(
        plant_attributes[["plant_id_eia", "ba_code", "fuel_category"]],
        how="left",
        on="plant_id_eia",
    )

In [None]:
# Aggregate EIA data to BA/fuel/month, then assign hourly profile per BA/fuel
monthly_eia_data_to_shape_agg = impute_hourly_profiles.aggregate_eia_data_to_ba_fuel(
    monthly_eia_data_to_shape, plant_attributes
)

In [None]:
hourly_profiles[(hourly_profiles['ba_code'] == 'AEC') & (hourly_profiles['fuel_category'] == 'hydro') & (hourly_profiles['report_date'] == '2020-4-01')]

In [None]:
shaped_eia_data = impute_hourly_profiles.shape_monthly_eia_data_as_hourly(
        monthly_eia_data_to_shape_agg, hourly_profiles
    )

In [None]:
shaped_eia_data[(shaped_eia_data['ba_code'] == 'SWPP') & (shaped_eia_data['fuel_category'] == 'solar') & (hourly_profiles['report_date'] == '2020-4-01')]

# Investigate missing data

In [None]:
year = 2020
path_prefix = ''
eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}eia923_allocated_{year}.csv', parse_dates=['report_date'])
plant_frame = pd.read_csv(f"../data/outputs/{path_prefix}plant_static_attributes_{year}.csv")
residual_profiles = pd.read_csv(f"../data/outputs/{path_prefix}residual_profiles_{year}.csv")
eia923_allocated = eia923_allocated.merge(plant_frame, how='left', on='plant_id_eia')

In [None]:
# 11. Assign hourly profile to monthly data
print("Assigning hourly profile to monthly EIA-923 data")
# create a separate dataframe containing only the generators for which we do not have CEMS data
monthly_eia_data_to_shape = eia923_allocated[
    (eia923_allocated["hourly_data_source"] == "eia")
    & ~(eia923_allocated["fuel_consumed_mmbtu"].isna())
]

In [None]:
monthly_data_to_shape[monthly_data_to_shape['energy_source_code'] == 'SUN']

In [None]:
hourly_profiles = impute_hourly_profiles.impute_missing_hourly_profiles(
    monthly_eia_data_to_shape, residual_profiles, year
)

In [None]:
hourly_profiles['report_date'] = pd.to_datetime(hourly_profiles['report_date'])

In [None]:
hourly_profiles[(hourly_profiles['fuel_category'] == 'solar') & (hourly_profiles['ba_code'] == 'ISNE')]

In [None]:
px.line(hourly_profiles[hourly_profiles['fuel_category'] == 'solar'], x='datetime_local', y='profile', color='ba_code')

In [None]:
hourly_profiles = impute_hourly_profiles.convert_profile_to_percent(hourly_profiles)

In [None]:
px.line(hourly_profiles[hourly_profiles['fuel_category'] == 'solar'], x='datetime_local', y='profile', color='ba_code')

In [None]:

shaped_eia_data = impute_hourly_profiles.shape_monthly_eia_data_as_hourly(
    monthly_eia_data_to_shape, hourly_profiles
)

In [None]:
# specify columns containing monthly data that should be distributed to hourly
columns_to_shape = [
    "net_generation_mwh",
    "fuel_consumed_mmbtu",
    "fuel_consumed_for_electricity_mmbtu",
    "co2_mass_lb",
    "ch4_mass_lb",
    "n2o_mass_lb",
    "nox_mass_lb",
    "so2_mass_lb",
    "co2_mass_lb_for_electricity",
    "ch4_mass_lb_for_electricity",
    "n2o_mass_lb_for_electricity",
    "nox_mass_lb_for_electricity",
    "so2_mass_lb_for_electricity",
    "co2_mass_lb_adjusted",
    "ch4_mass_lb_adjusted",
    "n2o_mass_lb_adjusted",
    "nox_mass_lb_adjusted",
    "so2_mass_lb_adjusted",
]

# group eia data by plant
shaped_monthly_data = (
    monthly_eia_data_to_shape.groupby(
        [
            "plant_id_eia",
            "subplant_id",
            "report_date",
            "plant_primary_fuel",
            "hourly_data_source",
            "fuel_category",
            "fuel_category_eia930",
            "ba_code",
            "ba_code_physical",
            "state",
            "distribution_flag",
        ],dropna=False)
    .sum()
    .reset_index()
)

In [None]:
monthly_eia_data_to_shape.plant_primary_fuel.unique()

In [None]:
shaped_monthly_data.plant_primary_fuel.unique()

In [None]:


# merge the hourly profiles into each plant-month
shaped_monthly_data = shaped_monthly_data.merge(
    hourly_profiles, how="left", on=["report_date", "fuel_category", "ba_code"]
)

# plant-months where there is negative net generation, assign a flat profile
shaped_monthly_data.loc[
    shaped_monthly_data["net_generation_mwh"] < 0, "profile"
] = 1 / (shaped_monthly_data["report_date"].dt.daysinmonth * 24)
shaped_monthly_data.loc[
    shaped_monthly_data["net_generation_mwh"] < 0, "profile_method"
] = "flat_negative_generation"

# shape the data
for column in columns_to_shape:
    shaped_monthly_data[column] = (
        shaped_monthly_data[column] * shaped_monthly_data["profile"]
    )
shaped_monthly_data = shaped_monthly_data.drop(columns=["profile"])

# re order the columns
column_order = [
    "plant_id_eia",
    "subplant_id",
    "datetime_local",
    "datetime_utc",
    "report_date",
    "net_generation_mwh",
    "fuel_consumed_mmbtu",
    "fuel_consumed_for_electricity_mmbtu",
    "co2_mass_lb",
    "ch4_mass_lb",
    "n2o_mass_lb",
    "nox_mass_lb",
    "so2_mass_lb",
    "co2_mass_lb_for_electricity",
    "ch4_mass_lb_for_electricity",
    "n2o_mass_lb_for_electricity",
    "nox_mass_lb_for_electricity",
    "so2_mass_lb_for_electricity",
    "co2_mass_lb_adjusted",
    "ch4_mass_lb_adjusted",
    "n2o_mass_lb_adjusted",
    "nox_mass_lb_adjusted",
    "so2_mass_lb_adjusted",
    "profile_method",
    "hourly_data_source",
]
shaped_monthly_data = shaped_monthly_data[column_order]

In [None]:
shaped_eia_data[(shaped_eia_data['fuel_category'] == 'solar')]

# Combine all plant data together

In [None]:
# load data from csv
year = 2020
path_prefix = ''
cems = pd.read_csv(f'../data/outputs/{path_prefix}cems_{year}.csv')
partial_cems = pd.read_csv(f'../data/outputs/{path_prefix}partial_cems_scaled_{year}.csv')
shaped_eia_data = pd.read_csv(f'../data/outputs/{path_prefix}shaped_eia923_data{year}.csv')
plant_frame = pd.read_csv(f"../data/outputs/{path_prefix}plant_static_attributes.csv")


In [None]:
# check that none of the sources have overlapping subplant-months
columns_to_check_for_duplicates = ['plant_id_eia','subplant_id','report_date']
cems_subplant_months = cems[columns_to_check_for_duplicates].drop_duplicates()
cems_subplant_months['cems'] = 1
partial_cems_subplant_months = partial_cems[columns_to_check_for_duplicates].drop_duplicates()
partial_cems_subplant_months['partial_cems'] = 1
shaped_eia_subplant_months = shaped_eia_data[columns_to_check_for_duplicates].drop_duplicates()
shaped_eia_subplant_months['shaped_eia'] = 1

data_source_overlap = cems_subplant_months.merge(partial_cems_subplant_months, how='outer',on=columns_to_check_for_duplicates).merge(shaped_eia_subplant_months, how='outer',on=columns_to_check_for_duplicates).fillna(0)
data_source_overlap

# check that there is no overlap between shaped eia and cems data
data_source_overlap[(data_source_overlap.shaped_eia == 1) & ((data_source_overlap.cems == 1) | (data_source_overlap.partial_cems == 1))]

# check for overlap between cems and partial cems data
data_source_overlap[(data_source_overlap.cems == 1) & (data_source_overlap.partial_cems == 1)]

In [None]:
cems = data_cleaning.filter_unique_cems_data(cems, partial_cems)

In [None]:
combined_plant_data = data_cleaning.combine_subplant_data(cems, partial_cems, shaped_eia_data)
combined_plant_data

In [None]:
ba_tz = load_data.load_ba_reference()[["ba_code", "timezone_local"]]


In [None]:
ba_table.columns

In [None]:
ba_tz = load_data.load_ba_reference()[["ba_code", "timezone_local"]]
generated_emission_rate_columns = [
    "generated_co2_rate_lb_per_mwh_for_electricity",
    "generated_ch4_rate_lb_per_mwh_for_electricity",
    "generated_n2o_rate_lb_per_mwh_for_electricity",
    "generated_nox_rate_lb_per_mwh_for_electricity",
    "generated_so2_rate_lb_per_mwh_for_electricity",
    "generated_co2_rate_lb_per_mwh_adjusted",
    "generated_ch4_rate_lb_per_mwh_adjusted",
    "generated_n2o_rate_lb_per_mwh_adjusted",
    "generated_nox_rate_lb_per_mwh_adjusted",
    "generated_so2_rate_lb_per_mwh_adjusted",
]

for ba in list(ba_fuel_data.ba_code.unique()):

    # filter the data for a single BA
    ba_table = ba_fuel_data[ba_fuel_data["ba_code"] == ba].drop(columns="ba_code")

    # convert the datetime_utc column back to a datetime
    ba_table["datetime_utc"] = pd.to_datetime(ba_table["datetime_utc"], utc=True)

    # calculate a total for the BA
    ba_total = ba_table.groupby(["datetime_utc"]).sum()[data_columns].reset_index()
    ba_total["fuel_category"] = "total"

    # concat the totals to the fuel-specific totals
    ba_table = pd.concat([ba_table, ba_total], axis=0, ignore_index=True)

    # round all values to one decimal place
    ba_table = ba_table.round(2)

    for emission_type in ["_for_electricity", "_adjusted"]:
        for emission in ["co2", "ch4", "n2o", "nox", "so2"]:
            ba_table[f"generated_{emission}_rate_lb_per_mwh{emission_type}"] = (
                (
                    ba_table[f"{emission}_mass_lb{emission_type}"]
                    / ba_table["net_generation_mwh"]
                )
                .fillna(0)
                .replace(np.inf, np.NaN)
                .replace(-np.inf, np.NaN)
            )

    # create a local datetime column
    local_tz = ba_tz.loc[ba_tz["ba_code"] == ba, "timezone_local"].item()
    ba_table["datetime_local"] = ba_table["datetime_utc"].dt.tz_convert(local_tz)

    # re-order columns
    ba_table = ba_table[['fuel_category','datetime_local','datetime_utc'] + data_columns + generated_emission_rate_columns]

    # export to a csv
    ba_table.to_csv(
        f"../data/results/{path_prefix}power_sector_data/{ba}.csv", index=False
    )



In [None]:
for ba in list(ba_fuel_data.ba_code.unique()):

    # filter the data for a single BA
    ba_table = ba_fuel_data[ba_fuel_data["ba_code"] == ba].drop(columns="ba_code")

    # convert the datetime_utc column back to a datetime
    ba_table["datetime_utc"] = pd.to_datetime(ba_table["datetime_utc"], utc=True)

    # calculate a total for the BA
    ba_total = (
        ba_table.groupby(["datetime_utc"])
        .sum()[data_columns]
        .reset_index()
    )
    ba_total["fuel_category"] = "total"

    # concat the totals to the fuel-specific totals
    ba_table = pd.concat([ba_table, ba_total], axis=0, ignore_index=True)

    # round all values to one decimal place
    ba_table = ba_table.round(1)

    for emission_type in ['_for_electricity','_adjusted']:
        for emission in ['co2','ch4','n2o','nox','so2']:
            ba_table[f"generated_{emission}_rate_lb_per_mwh{emission_type}"] = (
                    (ba_table[f"{emission}_mass_lb{emission_type}"] / ba_table["net_generation_mwh"])
                    .fillna(0)
                    .replace(np.inf, np.NaN).replace(-np.inf, np.NaN)
                )

    # export to a csv
    ba_table.to_csv(f"../data/results/{path_prefix}power_sector_data/{ba}.csv")

# Test new functions

In [None]:
year = 2020
path_prefix = ''
cems = pd.read_csv(f'../data/outputs/{path_prefix}cems_{year}.csv', parse_dates=['operating_datetime_utc','report_date'])
eia923_allocated = pd.read_csv(f'../data/outputs/{path_prefix}eia923_allocated_{year}.csv', parse_dates=['report_date'])

In [None]:
cems.energy_source_code.unique()

In [None]:
cems[cems['energy_source_code'] == 'MSW']

In [None]:

px.line(cems[cems['energy_source_code'] == 'BLQ'], x='operating_datetime_utc', y='fuel_consumed_mmbtu', color='cems_id')

In [None]:
plant_frame = pd.read_csv(f"../data/outputs/{path_prefix}plant_static_attributes.csv")
eia923_allocated = eia923_allocated.merge(plant_frame, how='left', on='plant_id_eia')

In [None]:
# 11. Assign hourly profile to monthly data
print('Assigning hourly profile to monthly EIA-923 data')
# create a separate dataframe containing only the generators for which we do not have CEMS data
monthly_eia_data_to_distribute = eia923_allocated[
    (eia923_allocated["hourly_data_source"] == "eia")
    & ~(eia923_allocated["fuel_consumed_mmbtu"].isna())
]
# load profile data and format for use in the pipeline
# TODO: once this is in the pipeline (step 10), may not need to read file
hourly_profiles = pd.read_csv(
    "../data/outputs/residual_profiles.csv", parse_dates=["report_date"]
)


In [None]:
available_profiles = hourly_profiles[['ba_code','fuel_category']].drop_duplicates()
ba_fuel_to_distribute = monthly_eia_data_to_distribute[['ba_code','fuel_category']].drop_duplicates().dropna()
missing_profiles = ba_fuel_to_distribute.merge(available_profiles, how='outer', on=['ba_code','fuel_category'], indicator='source')
missing_profiles = missing_profiles[missing_profiles.source == 'left_only']
missing_profiles.sort_values(by=['fuel_category','ba_code'])

In [None]:
hourly_profiles = hourly_profiles.load_hourly_profiles(monthly_eia_data_to_distribute, year)

In [None]:
print(hourly_profiles[['ba_code','fuel_category','profile_method']].drop_duplicates().pivot_table(index='fuel_category',columns='profile_method', aggfunc='count').fillna(0).astype(int))

# investigate profile shapes

In [None]:
import plotly.express as px
import src.eia930 as eia930

In [None]:
hydro_demand = load_data.load_raw_eia930_data(year, 'BALANCE')
hydro_demand = hydro_demand[["Balancing Authority","datetime_utc","Demand (MW)","Net Generation (MW)","Net Generation (MW) from Hydropower and Pumped Storage",]]

In [None]:
bas_with_no_hydro = hydro_demand.groupby("Balancing Authority").sum().reset_index()
bas_with_no_hydro = list(bas_with_no_hydro.loc[(bas_with_no_hydro["Net Generation (MW) from Hydropower and Pumped Storage"] == 0),"Balancing Authority"])

In [None]:
hydro_demand = hydro_demand[~hydro_demand['Balancing Authority'].isin(bas_with_no_hydro)]

In [None]:
px.line(hydro_demand, x='datetime_utc', y='Net Generation (MW) from Hydropower and Pumped Storage', color='Balancing Authority')

In [None]:
hydro_corr = hydro_demand.groupby("Balancing Authority")[["Demand (MW)","Net Generation (MW) from Hydropower and Pumped Storage"]].corr().reset_index()
hydro_corr = hydro_corr[hydro_corr['level_1'] == 'Demand (MW)'].drop(columns=["Demand (MW)","level_1"])
hydro_corr

In [None]:
cleaned_930 = eia930.load_chalendar_for_pipeline(
    "../data/outputs/EBA_adjusted_elec.csv", year=year
)

In [None]:
cleaned_930.fuel_category.unique()

In [None]:
fuel = 'other'

data_to_plot = cleaned_930[cleaned_930['fuel_category'] == fuel]

px.line(data_to_plot, x='datetime_local', y='net_generation_mwh_930', color='ba_code')

In [None]:
data_to_plot

In [None]:
data_to_plot.pivot(index='datetime_local', columns='ba_code', values='net_generation_mwh_930').corr()