# HVAC optimization case study
## Validation

### Setup

In [1]:
from pathlib import Path
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.metrics import classification_report

In [2]:
root_path = Path.cwd().parent
processed_data_path = root_path.joinpath("data/processed")
model_path = root_path.joinpath("models")

### Load data

In [3]:
val_df = pd.read_csv(processed_data_path.joinpath("valid_processed.csv"))

### Preprocessing

In [4]:
target = "occupancy"
date_features = ["year", "month", "day", "weekday", "hour", "minute", "week"]
org_features = date_features + ["temperature", "humidity", "light", "humidityratio"] 
all_features = [col for col in val_df.columns if col != target]
bool_features = [feat for feat in val_df[all_features] if val_df[feat].dtype == "bool"]

In [5]:
X_valid = val_df.drop(columns=target)
y_valid = val_df[target]

### Load model

In [6]:
model = mlflow.sklearn.load_model(f"{model_path.joinpath('tree_limited')}")

### Validate

In [7]:
y_pred = model.predict(X_valid[org_features])

In [8]:
print(classification_report(y_valid, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      7703
           1       0.97      0.99      0.98      2049

    accuracy                           0.99      9752
   macro avg       0.99      0.99      0.99      9752
weighted avg       0.99      0.99      0.99      9752

