# Create Load Control Time Series
Transforms the index load control values into a time series.  This makes it straightforward to join with the meter data.

In [0]:
%run ../Utilities/ConfigUtilities

In [0]:
from pyspark.sql.functions import concat_ws, lit, col, when
import pandas as pd

debug = 1

In [0]:
# Read the indexed load control periods.
lc_df = spark.read.format("delta").load(LOAD_CONTROL_INDEX_PATH)

if debug:
    display(lc_df)

### Create time series data set with the following steps:
- Subset down to ID, start index, end index.
- Loop through each event to create a time series dataframe with the meter indexes.
- Save

In [0]:
# Offset the time period from the actual start / stop index for the load control period.  Since the calendar is based on 5 minute intervals, there are 12 indices / hour.
# This offseting could take multiple forms:
#   - If simply targeting the load control period, then offset the start by +1 (to avoid an issue with join of the end sample) and the end by 0.
#   - If you want to avoid the ramp up/down within the load control period, then offset the start by a positive number and the end by a negative number.
#   - If you want data points before and after the load control period, then offset the start by a negative number and the end by a positive number.
lc_sub_df = lc_df.select('LoadControlEventID', 'ResourceName', 'StartMeterSampleIndex', 'EndMeterSampleIndex')

start_offset = -48  # 4 hours
end_offset = 48     # 4 hours
lc_sub_df = lc_sub_df.withColumn('StartOffsetIndex', col('StartMeterSampleIndex') + start_offset) \
                        .withColumn('EndOffsetIndex', col('EndMeterSampleIndex') + end_offset)

if debug:
    display(lc_sub_df)

In [0]:
# The data is not that big. so let's do this in Pandas.
lc_sub_pdf = lc_sub_df.toPandas()

if debug:
    display(lc_sub_pdf)

In [0]:
# Create a target dataset.
lc_sub_ts_pdf = pd.DataFrame(columns=['LoadControlEventID', 'ResourceName', 'MeterSampleIndex'])

#for i in range(0, 2):
for i in range(0, lc_sub_pdf.shape[0]):
    start_index = lc_sub_pdf.loc[i, 'StartOffsetIndex']
    stop_index = lc_sub_pdf.loc[i, 'EndOffsetIndex']
    lc_cal_temp_pdf = pd.DataFrame(columns=['LoadControlEventID', 'ResourceName', 'MeterSampleIndex'])
    lc_cal_temp_pdf['MeterSampleIndex'] = range(start_index, stop_index+1)
    lc_cal_temp_pdf['LoadControlEventID'] = lc_sub_pdf.loc[i, 'LoadControlEventID']
    lc_cal_temp_pdf['ResourceName'] = lc_sub_pdf.loc[i, 'ResourceName']
    print(lc_sub_pdf.loc[i, 'LoadControlEventID'])  

    if (lc_sub_pdf.loc[i, 'LoadControlEventID'] == "LREC.IRR_2025-06-02 16:00:00_2025-06-02 20:00:00"):
        print("Found it")
        print(lc_sub_pdf.loc[i, 'LoadControlEventID'])  
        print(str(i))

    lc_sub_ts_pdf = pd.concat([lc_sub_ts_pdf, lc_cal_temp_pdf])
    i = i+1

display(lc_sub_ts_pdf)

In [0]:
# Convert pandas back to Spark dataframe.
lc_sub_ts_df = spark.createDataFrame(lc_sub_ts_pdf)

if debug:
    print(lc_sub_ts_df.count())
    display(lc_sub_ts_df)
    display(lc_sub_ts_df.filter(col('LoadControlEventID') == "LREC.IRR_2025-06-02 16:00:00_2025-06-02 20:00:00"))


In [0]:
lc_sub_lc_ts_df = lc_sub_ts_df.join(lc_sub_df, on='LoadControlEventID', how='inner')

if debug:
    display(lc_sub_lc_ts_df.filter(col('StartMeterSampleIndex').isNull()))
    display(lc_sub_lc_ts_df.filter(col('LoadControlEventID') == "LREC.IRR_2025-06-02 16:00:00_2025-06-02 20:00:00"))

In [0]:
# Identify intervals that are within the load control period.  Steps to do this:
#   - Join the load control time series with the original data containing the start / stop index.
#   - Add a LoadControlEvent flag (1) to all indices within the start / stop index.
#   - Remove the extra columns.
lc_sub_df = lc_df.select('LoadControlEventId', 'StartMeterSampleIndex', 'EndMeterSampleIndex')
lc_sub_lc_ts_df = lc_sub_ts_df.join(lc_sub_df, on='LoadControlEventID', how='inner')

lc_sub_lc_ts_df = lc_sub_lc_ts_df.withColumn('LoadControlEvent', 
                                when(
                                    (col('MeterSampleIndex') > col('StartMeterSampleIndex')) & 
                                    (col('MeterSampleIndex') <= col('EndMeterSampleIndex')), 
                                lit(1)
                                ).otherwise(lit(0)) )

if debug:
    display(lc_sub_lc_ts_df)
    display(lc_sub_lc_ts_df.filter(col('LoadControlEventID') == "LREC.IRR_2025-06-02 16:00:00_2025-06-02 20:00:00"))



In [0]:
# Drop the extra columns.
lc_sub_lc_ts_df = lc_sub_lc_ts_df.drop('StartMeterSampleIndex', 'EndMeterSampleIndex')

In [0]:
# Save the data to the silver enhanced tier
lc_sub_lc_ts_df.write.mode('overwrite').parquet(LOAD_CONTROL_TIMESERIES_PATH)

In [0]:
# Vacuum
spark.sql(f"VACUUM '{LOAD_CONTROL_TIMESERIES_PATH}'")