Skip to content

Commit

Permalink
Merge pull request #458 from openeemeter/update-docker-and-dependencies
Browse files Browse the repository at this point in the history
Updated Dockerfile and Pipfile.lock and fixed tests
  • Loading branch information
philngo-recurve committed Mar 11, 2023
2 parents be460bf + d90999c commit 6d31d34
Show file tree
Hide file tree
Showing 18 changed files with 2,040 additions and 1,186 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ Changelog
Development
-----------

* Placeholder
* Update python version in Dockerfile.
* Update other dependencies (including adding rust) in Dockerfile.
* Remove pinned dependencies in Pipfile.
* Relock Pipfile (and do so inside of the docker image).
* Update pytests to account for changes in newer pandas where categorical variables are no longer included in `df.sum().sum()`.



3.1.1
-----
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
FROM python:3.6.6
FROM python:3.10

RUN set -ex && pip install pip pipenv --upgrade

# sphinxcontrib-spelling dependency
RUN apt-get update \
&& apt-get install -yqq libenchant-dev
&& apt-get install -yqq libenchant-2-dev

COPY Pipfile Pipfile
COPY Pipfile.lock Pipfile.lock
Expand Down
14 changes: 7 additions & 7 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@ name = "pypi"

[packages]

click = "==7.0"
eeweather = ">=0.3.12"
click = "*"
matplotlib = "*"
statsmodels = "==0.11.1"
scipy = "==1.4.1"
sqlalchemy = "*"
pandas = "==0.25.2"
statsmodels = "*"
scipy = "*"
pandas = "*"


[dev-packages]

black = "==18.6b4"
sqlalchemy = "*"
eeweather = ">=0.3.12"
black = "*"
coverage = "*"
jupyterlab = "*"
nbsphinx = "*"
Expand Down
3,009 changes: 1,910 additions & 1,099 deletions Pipfile.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ help:

# Custom target for autobuild (philngo)
livehtml:
sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)"/html -H 0.0.0.0 -p 8000 --poll -z ../eemeter
sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)"/html --host 0.0.0.0 --port 8000 --watch ../eemeter

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@


def setup(app):
app.add_stylesheet("css/custom.css") # may also be an URL
app.add_css_file("css/custom.css") # may also be an URL
7 changes: 4 additions & 3 deletions eemeter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,11 @@ def cli():
def _get_data(
sample, meter_file, temperature_file, heating_balance_points, cooling_balance_points
):

if sample is not None:
with resource_stream("eemeter.samples", "metadata.json") as f:
metadata = json.loads(f.read().decode("utf-8"))
if sample in metadata:
click.echo("Loading sample: {}".format(sample))

meter_file = resource_stream(
"eemeter.samples", metadata[sample]["meter_data_filename"]
)
Expand Down Expand Up @@ -106,7 +104,10 @@ def _get_data(
heating_balance_points=heating_balance_points,
cooling_balance_points=cooling_balance_points,
)
return merge_features([usage_per_day, temperature_features])
merged_features = merge_features([usage_per_day, temperature_features])
# usage column must be `meter_value` for model fitting to work
merged_features.rename(columns={"usage_per_day": "meter_value"}, inplace=True)
return merged_features


@cli.command()
Expand Down
8 changes: 4 additions & 4 deletions eemeter/derivatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _compute_ols_error(
):
ols_model_agg_error = (
(t_stat * rmse_base_residuals * post_obs)
/ (base_obs ** 0.5)
/ (base_obs**0.5)
* (1.0 + ((base_avg - post_avg) ** 2.0 / base_var)) ** 0.5
)

Expand All @@ -46,7 +46,7 @@ def _compute_ols_error(
)

ols_total_agg_error = (
ols_model_agg_error ** 2.0 + ols_noise_agg_error ** 2.0
ols_model_agg_error**2.0 + ols_noise_agg_error**2.0
) ** 0.5

return ols_total_agg_error, ols_model_agg_error, ols_noise_agg_error
Expand Down Expand Up @@ -75,7 +75,7 @@ def _compute_fsu_error(

fsu_error_band = total_base_energy * (
t_stat
* (a_coeff * months_reporting ** 2.0 + b_coeff * months_reporting + c_coeff)
* (a_coeff * months_reporting**2.0 + b_coeff * months_reporting + c_coeff)
* (rmse_base_residuals / base_avg)
* ((base_obs / nprime) * (1.0 + (2.0 / nprime)) * (1.0 / post_obs)) ** 0.5
)
Expand Down Expand Up @@ -372,7 +372,7 @@ def _compute_error_bands_modeled_savings(
"FSU Error Band: Baseline": fsu_error_band_baseline,
"FSU Error Band: Reporting": fsu_error_band_reporting,
"FSU Error Band": (
fsu_error_band_baseline ** 2.0 + fsu_error_band_reporting ** 2.0
fsu_error_band_baseline**2.0 + fsu_error_band_reporting**2.0
)
** 0.5,
}
Expand Down
3 changes: 0 additions & 3 deletions eemeter/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ def _compute_columns(temps):
def _compute_columns(temps):
count = temps.shape[0]
if count > 24:

day_groups = np.floor(np.arange(count) / 24)
daily_temps = temps.groupby(day_groups).agg(["mean", "count"])
n_limit_period = percent_hourly_coverage_per_billing_period * count
Expand Down Expand Up @@ -267,7 +266,6 @@ def _compute_columns(temps):
for bp in heating_balance_points
}
else: # faster route for daily case, should have same effect.

if count > n_limit_daily:
count_cols = {"n_days_kept": 1, "n_days_dropped": 0}
# CalTRACK 2.2.2.3
Expand Down Expand Up @@ -833,7 +831,6 @@ def compute_temperature_bin_features(temperatures, bin_endpoints):
bins = {}

for i, (left_bin, right_bin) in enumerate(zip(bin_endpoints, bin_endpoints[1:])):

bin_name = "bin_{}".format(i)

in_bin = (temperatures > left_bin) & (temperatures <= right_bin)
Expand Down
23 changes: 12 additions & 11 deletions eemeter/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,12 +384,10 @@ def __init__(
or self.degrees_of_freedom < 1
or self.observed_length < self.num_parameters
):

self.cvrmse_auto_corr_correction = None
self.approx_factor_auto_corr_correction = None
self.fsu_base_term = None
else:

# factor to correct cvrmse_adj for autocorrelation of inputs
# i.e., divide by (n' - n_param) instead of by (n - n_param)
self.cvrmse_auto_corr_correction = (
Expand All @@ -411,15 +409,18 @@ def __init__(
)

def __repr__(self):
return "ModelMetrics(merged_length={}, r_squared_adj={}, cvrmse_adj={}, " "mape_no_zeros={}, nmae={}, nmbe={}, autocorr_resid={}, confidence_level={})".format(
self.merged_length,
round(self.r_squared_adj, 3),
round(self.cvrmse_adj, 3),
round(self.mape_no_zeros, 3),
round(self.nmae, 3),
round(self.nmbe, 3),
round(self.autocorr_resid, 3),
round(self.confidence_level, 3),
return (
"ModelMetrics(merged_length={}, r_squared_adj={}, cvrmse_adj={}, "
"mape_no_zeros={}, nmae={}, nmbe={}, autocorr_resid={}, confidence_level={})".format(
self.merged_length,
round(self.r_squared_adj, 3),
round(self.cvrmse_adj, 3),
round(self.mape_no_zeros, 3),
round(self.nmae, 3),
round(self.nmbe, 3),
round(self.autocorr_resid, 3),
round(self.confidence_level, 3),
)
)

def json(self):
Expand Down
10 changes: 7 additions & 3 deletions tests/test_caltrack_design_matrices.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@


def test_create_caltrack_hourly_preliminary_design_matrix(
il_electricity_cdd_hdd_hourly
il_electricity_cdd_hdd_hourly,
):
meter_data = il_electricity_cdd_hdd_hourly["meter_data"]
temperature_data = il_electricity_cdd_hdd_hourly["temperature_data"]
Expand All @@ -47,6 +47,8 @@ def test_create_caltrack_hourly_preliminary_design_matrix(
"n_hours_kept",
"temperature_mean",
]
# In newer pandas, categorical columns (like hour_of_week) arent included in sum
design_matrix.hour_of_week = design_matrix.hour_of_week.astype(float)
assert round(design_matrix.sum().sum(), 2) == 136352.61


Expand Down Expand Up @@ -386,6 +388,7 @@ def test_create_caltrack_hourly_segmented_design_matrices(
"meter_value",
"weight",
]
design_matrix.hour_of_week = design_matrix.hour_of_week.astype(float)
assert round(design_matrix.sum().sum(), 2) == 126210.07

design_matrix = design_matrices["mar-apr-may-weighted"]
Expand All @@ -397,11 +400,12 @@ def test_create_caltrack_hourly_segmented_design_matrices(
"meter_value",
"weight",
]
design_matrix.hour_of_week = design_matrix.hour_of_week.astype(float)
assert round(design_matrix.sum().sum(), 2) == 167659.28


def test_create_caltrack_billing_design_matrix_empty_temp(
il_electricity_cdd_hdd_billing_monthly
il_electricity_cdd_hdd_billing_monthly,
):
meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_monthly["temperature_data"][:0]
Expand All @@ -412,7 +416,7 @@ def test_create_caltrack_billing_design_matrix_empty_temp(


def test_create_caltrack_billing_design_matrix_partial_empty_temp(
il_electricity_cdd_hdd_billing_monthly
il_electricity_cdd_hdd_billing_monthly,
):
meter_data = il_electricity_cdd_hdd_billing_monthly["meter_data"]
temperature_data = il_electricity_cdd_hdd_billing_monthly["temperature_data"][:200]
Expand Down
4 changes: 3 additions & 1 deletion tests/test_caltrack_hourly.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def test_caltrack_hourly_fit_feature_processor(
"weight",
]
assert result.shape == (24, 10)
result.hour_of_week = result.hour_of_week.astype(float)
assert round(result.sum().sum(), 2) == 5916.0


Expand Down Expand Up @@ -127,6 +128,7 @@ def test_caltrack_hourly_prediction_feature_processor(
"weight",
]
assert result.shape == (24, 9)
result.hour_of_week = result.hour_of_week.astype(float)
assert round(result.sum().sum(), 2) == 4956.0


Expand Down Expand Up @@ -458,7 +460,7 @@ def segmented_design_matrices_single_mode(


def test_fit_caltrack_hourly_model_segment_single_mode(
segmented_design_matrices_single_mode
segmented_design_matrices_single_mode,
):
segment_name = "dec-jan-feb-weighted"
segment_data = segmented_design_matrices_single_mode[segment_name]
Expand Down
14 changes: 8 additions & 6 deletions tests/test_caltrack_usage_per_day.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def cdd_hdd_h54_c67_billing_monthly_totals(il_electricity_cdd_hdd_billing_monthl


def test_caltrack_predict_design_matrix_input_avg_false_output_avg_true(
cdd_hdd_h54_c67_billing_monthly_totals
cdd_hdd_h54_c67_billing_monthly_totals,
):
data = cdd_hdd_h54_c67_billing_monthly_totals
prediction = _caltrack_predict_design_matrix(
Expand All @@ -694,7 +694,7 @@ def test_caltrack_predict_design_matrix_input_avg_false_output_avg_true(


def test_caltrack_predict_design_matrix_input_avg_false_output_avg_false(
cdd_hdd_h54_c67_billing_monthly_totals
cdd_hdd_h54_c67_billing_monthly_totals,
):
data = cdd_hdd_h54_c67_billing_monthly_totals
prediction = _caltrack_predict_design_matrix(
Expand Down Expand Up @@ -730,7 +730,7 @@ def cdd_hdd_h54_c67_billing_monthly_avgs(il_electricity_cdd_hdd_billing_monthly)


def test_caltrack_predict_design_matrix_input_avg_true_output_avg_false(
cdd_hdd_h54_c67_billing_monthly_avgs
cdd_hdd_h54_c67_billing_monthly_avgs,
):
data = cdd_hdd_h54_c67_billing_monthly_avgs
prediction = _caltrack_predict_design_matrix(
Expand All @@ -750,7 +750,7 @@ def test_caltrack_predict_design_matrix_input_avg_true_output_avg_false(


def test_caltrack_predict_design_matrix_input_avg_true_output_avg_true(
cdd_hdd_h54_c67_billing_monthly_avgs
cdd_hdd_h54_c67_billing_monthly_avgs,
):
data = cdd_hdd_h54_c67_billing_monthly_avgs
prediction = _caltrack_predict_design_matrix(
Expand Down Expand Up @@ -792,7 +792,7 @@ def test_caltrack_predict_design_matrix_n_days(cdd_hdd_h54_c67_billing_monthly_t


def test_caltrack_predict_design_matrix_no_days_fails(
cdd_hdd_h54_c67_billing_monthly_totals
cdd_hdd_h54_c67_billing_monthly_totals,
):
# This makes sure that the method fails if neither n_days nor
# a DatetimeIndex is available.
Expand Down Expand Up @@ -1443,7 +1443,9 @@ def test_select_best_candidate_ok(
assert best_candidate.r_squared_adj == 1


def test_select_best_candidate_none(candidate_model_disqualified,):
def test_select_best_candidate_none(
candidate_model_disqualified,
):
candidates = [candidate_model_disqualified]

best_candidate, warnings = select_best_candidate(candidates)
Expand Down

0 comments on commit 6d31d34

Please sign in to comment.