# Create Coincident Load By Load Control Meter
Finds usage for all meters in a load control program during the GRE monthly coincidental load.

In [0]:
%run ../../Utilities/ConfigUtilities

In [0]:
# Set up the environment using a function in ConfigUtilties.
set_spark_config()

In [0]:
from pyspark.sql.functions import col, concat, lit, sum, count

debug = 1

In [0]:
# Read the meter load type information.
load_types_df = spark.read.csv(METER_CONTROL_TYPES_PATH, header=True)

if debug:
  display(load_types_df)
  print(load_types_df.count())

In [0]:
# Dig into the meter position 1 programs.  Some of these may not be dedicated meters and should be removed from the CP analysis.
lc_pos1_df = load_types_df.filter(col('BI_MTR_POS_NBR') == "1").groupBy("BI_LOAD_TYPE", "BI_LOAD_TYPE_DESC").agg(count('*').alias("MeterCount")).orderBy(col("MeterCount").desc())

display(lc_pos1_df)

In [0]:
# Remove position 1 meters in selected load types.  These are either credit accounts or cycled loads that should not be included in the CP analysis.
load_types_filter_df = load_types_df.filter(~((col('BI_MTR_POS_NBR') == "1") & (col('BI_LOAD_TYPE') == "CAC") | (col('BI_LOAD_TYPE') == "CA1")))

if debug:
    print(load_types_filter_df.count())
    display(load_types_filter_df)

In [0]:
from pyspark.sql.functions import collect_list, concat_ws

load_types_df = load_types_df.orderBy("BI_MTR_NBR", "BI_LOAD_TYPE")

# Group by BI_METER_NBR and concatenate BI_LOAD_TYPE separated by a comma
load_types_agg_df = load_types_df.groupBy("BI_MTR_NBR").agg(concat_ws(",", collect_list("BI_LOAD_TYPE")).alias("BI_LOAD_TYPE_LIST"))

if debug:
    print(load_types_agg_df.count())
    display(load_types_agg_df)

In [0]:
# Read the hourly meter data.
hourly_df = spark.read.format("delta").load(MDM_HOURLY_PATH)

if debug:
    display(hourly_df)

In [0]:
# Read the coincidental peak index data.
coincid_peak_df = spark.read.format("delta").load(COINCIDENTAL_LOAD_INDEX_PATH)

if debug:
    display(coincid_peak_df)

In [0]:
# Filter the hourly data to the coincidental peak hour by joining with the coincidental peak dataframe.
hourly_coincid_df = hourly_df.join(coincid_peak_df, hourly_df.EndMeterSampleIndex == coincid_peak_df.MeterSampleIndex)

hourly_coincid_df = hourly_coincid_df.drop('MeterSampleIndex', 'LocalYear', 'LocalMonth', 'LocalDay', 'HourEnding')

if debug:
    display(hourly_coincid_df)

In [0]:
# Filter to the load control meters.
hourly_coincid_lc_df = hourly_coincid_df.join(load_types_agg_df, (hourly_coincid_df.MeterNumber == load_types_agg_df.BI_MTR_NBR), how='inner')

if debug:
    display(hourly_coincid_lc_df)

In [0]:
hourly_coincid_lc_df = hourly_coincid_lc_df.withColumn("YearMonth", concat(hourly_coincid_lc_df.CoincidYear, lit("-"), hourly_coincid_lc_df.CoincidMonth))

In [0]:
# Save to the gold tier.
hourly_coincid_lc_df.write.format("delta").mode("overwrite").save(COINCIDENTAL_PEAK_LOADCONTROL_USAGE_PATH)

In [0]:
# Clean up the delta history.
spark.sql(f"VACUUM '{COINCIDENTAL_PEAK_LOADCONTROL_USAGE_PATH}'")

In [0]:
# Create a YearMonth dimension table for use in PowerBI.
yearmonth_df = hourly_coincid_lc_df.select("CoincidYear", "CoincidMonth", "YearMonth").distinct()


In [0]:
# Join with the demand rates to have them available to all other datasets in PowerBI.  
demand_rate_df = spark.read.csv(DEMAND_RATE_PATH, header=True, inferSchema=True)

yearmonth_demand_df = yearmonth_df.join(demand_rate_df, yearmonth_df.CoincidMonth == demand_rate_df.Month, how='inner').drop("Month")

if debug:
    display(yearmonth_demand_df)


In [0]:
# Save
yearmonth_demand_df.write.format("delta").mode("overwrite").save(COINCIDENTAL_PEAK_DIM_YEARMONTH_PATH)

In [0]:
# Clean up the delta history.
spark.sql(f"VACUUM '{COINCIDENTAL_PEAK_DIM_YEARMONTH_PATH}'")