In [1]:
# inline plotting
%matplotlib inline

# allow live package editing
%load_ext autoreload
%autoreload 2

In [2]:
import eemeter

meter_data, temperature_data, sample_metadata = (
    eemeter.load_sample("il-electricity-cdd-hdd-hourly")
)

# the dates if an analysis "blackout" period during which a project was performed.
blackout_start_date = sample_metadata["blackout_start_date"]
blackout_end_date = sample_metadata["blackout_end_date"]

# get meter data suitable for fitting a baseline model
baseline_meter_data, warnings = eemeter.get_baseline_data(
    meter_data, end=blackout_start_date, max_days=365
)

# create a design matrix for occupancy and segmentation
preliminary_design_matrix = (
    eemeter.create_caltrack_hourly_preliminary_design_matrix(
        baseline_meter_data, temperature_data,
    )
)

# build 12 monthly models - each step from now on operates on each segment
segmentation = eemeter.segment_time_series(
    preliminary_design_matrix.index,
    'three_month_weighted'
)

# assign an occupancy status to each hour of the week (0-167)
occupancy_lookup = eemeter.estimate_hour_of_week_occupancy(
    preliminary_design_matrix,
    segmentation=segmentation,
)


In [10]:
# assign temperatures to bins
occupied_temperature_bins, unoccupied_temperature_bins = eemeter.fit_temperature_bins(
    preliminary_design_matrix,
    segmentation=segmentation,
    occupancy_lookup=occupancy_lookup,
)


In [11]:
occupied_temperature_bins

Unnamed: 0_level_0,dec-jan-feb-weighted,jan-feb-mar-weighted,feb-mar-apr-weighted,mar-apr-may-weighted,apr-may-jun-weighted,may-jun-jul-weighted,jun-jul-aug-weighted,jul-aug-sep-weighted,aug-sep-oct-weighted,sep-oct-nov-weighted,oct-nov-dec-weighted,nov-dec-jan-weighted
bin_endpoints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
30,True,False,False,False,False,False,False,False,False,False,False,False
45,False,True,True,False,False,False,False,False,False,False,False,False
55,False,False,False,True,False,False,False,False,False,False,False,False
65,False,False,False,False,True,True,False,False,False,False,False,False
75,False,False,False,False,False,False,True,True,True,False,False,False
90,False,False,False,False,False,False,False,False,False,False,False,False


In [12]:
unoccupied_temperature_bins

Unnamed: 0_level_0,dec-jan-feb-weighted,jan-feb-mar-weighted,feb-mar-apr-weighted,mar-apr-may-weighted,apr-may-jun-weighted,may-jun-jul-weighted,jun-jul-aug-weighted,jul-aug-sep-weighted,aug-sep-oct-weighted,sep-oct-nov-weighted,oct-nov-dec-weighted,nov-dec-jan-weighted
bin_endpoints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
30,True,True,True,True,False,False,False,False,False,True,True,True
45,True,True,True,True,True,False,False,False,True,True,True,True
55,True,True,True,True,True,True,False,False,True,True,True,True
65,False,True,True,True,True,True,True,True,True,True,True,True
75,False,False,True,True,True,True,True,True,True,True,True,False
90,False,False,False,False,True,True,True,True,True,False,False,False


In [44]:

# build a design matrix for each monthly segment
segmented_design_matrices = (
    eemeter.create_caltrack_hourly_segmented_design_matrices(
        preliminary_design_matrix,
        segmentation,
        occupancy_lookup,
        occupied_temperature_bins,
        unoccupied_temperature_bins,
    )
)

# build a CalTRACK hourly model
baseline_model = eemeter.fit_caltrack_hourly_model(
    segmented_design_matrices,
    occupancy_lookup,
    occupied_temperature_bins,
    unoccupied_temperature_bins,
)

# get a year of reporting period data
reporting_meter_data, warnings = eemeter.get_reporting_data(
    meter_data, start=blackout_end_date, max_days=365
)

# compute metered savings for the year of the reporting period we've selected
metered_savings_dataframe, error_bands = eemeter.metered_savings(
    baseline_model, reporting_meter_data,
    temperature_data, with_disaggregated=True
)

# total metered savings
total_metered_savings = metered_savings_dataframe.metered_savings.sum()

In [45]:
dir(baseline_model)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'avgs_metrics',
 'json',
 'metadata',
 'method_name',
 'model',
 'predict',
 'settings',
 'status',
 'totals_metrics',

In [46]:
dir(baseline_model.model)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'json',
 'model_lookup',
 'occupancy_lookup',
 'occupied_temperature_bins',
 'predict',
 'prediction_feature_processor',
 'prediction_feature_processor_kwargs',
 'prediction_segment_name_mapping',
 'prediction_segment_type',
 'segment_models',
 'unoccupied_temperature_bins']

In [47]:
baseline_model.model.model_lookup

{'jan': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f61577a99e8>,
 'feb': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f6157df26d8>,
 'mar': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f61577fec88>,
 'apr': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f61577fb8d0>,
 'may': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f61577dee80>,
 'jun': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f61577cb128>,
 'jul': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f6157f24b38>,
 'aug': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f61576948d0>,
 'sep': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f6157675160>,
 'oct': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f615766b438>,
 'nov': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f6157604f98>,
 'dec': <eemeter.segmentation.CalTRACKSegmentModel at 0x7f6157678c88>}

In [48]:
baseline_model.model.model_lookup['jan']

<eemeter.segmentation.CalTRACKSegmentModel at 0x7f61577a99e8>

In [49]:
dir(baseline_model.model.model_lookup['jan'])

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'formula',
 'json',
 'model',
 'model_params',
 'predict',
 'segment_name',

In [50]:
baseline_model.model.model_lookup['jan'].model_params

{'C(hour_of_week)[0]': 2.680522443450201,
 'C(hour_of_week)[1]': 2.3104981801257716,
 'C(hour_of_week)[2]': 2.163083506797982,
 'C(hour_of_week)[3]': 2.8071224205289655,
 'C(hour_of_week)[4]': 3.108439020793594,
 'C(hour_of_week)[5]': 3.6143777335747553,
 'C(hour_of_week)[6]': 3.24630663833209,
 'C(hour_of_week)[7]': 3.450247415696197,
 'C(hour_of_week)[8]': 2.358429902471026,
 'C(hour_of_week)[9]': 2.8945018111775203,
 'C(hour_of_week)[10]': 3.5071905796750045,
 'C(hour_of_week)[11]': 3.608558986916206,
 'C(hour_of_week)[12]': 4.67951303179273,
 'C(hour_of_week)[13]': 4.726219907285297,
 'C(hour_of_week)[14]': 2.801229612879875,
 'C(hour_of_week)[15]': 2.958053098311373,
 'C(hour_of_week)[16]': 3.260221410032224,
 'C(hour_of_week)[17]': 3.182853717815694,
 'C(hour_of_week)[18]': 3.3166084409881478,
 'C(hour_of_week)[19]': 3.340781018574744,
 'C(hour_of_week)[20]': 3.093707885398569,
 'C(hour_of_week)[21]': 3.884584924250212,
 'C(hour_of_week)[22]': 3.4484824982857836,
 'C(hour_of_week

In [51]:
baseline_model.model.model_lookup['jan'].formula

'meter_value ~ C(hour_of_week) - 1 + bin_0_occupied + bin_1_occupied + bin_0_unoccupied + bin_1_unoccupied + bin_2_unoccupied + bin_3_unoccupied'