# Prepare environment

In [None]:
!source .venv/bin/activate

# Imports

In [None]:
import influxdb_client, os, time
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS
import numpy as np
import pandas as pd
import mlflow
from mlflow.models import infer_signature, set_signature
import mlflow.keras
import yaml
from pickle import dump
import math
import os
import shutil
import matplotlib.pyplot as plt 
from datetime import datetime, timedelta

with open('thermostat.yaml', 'r') as file:
    variables = yaml.safe_load(file)

print(f"{variables}")

# Load data

In [None]:
client = influxdb_client.InfluxDBClient(
    url=variables["influx_db"]["url"],
    token=variables["influx_db"]["token"],
    org=variables["influx_db"]["org"],
    verify_ssl=False,
    timeout=3600000
)

query_api = client.query_api()

# range = start: 2022-01-01T00:00:00Z, stop: 2023-08-21T09:30:30Z)
start_time = "2022-01-01T00:00:00Z"
query_start = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%SZ") - timedelta(days=5) 

base_query = """
        from(bucket: "homeassistant")
            |> range(start: )
            |> filter(fn: (r) => r["entity_id"] == "entity_name")
            |> filter(fn: (r) => r["_field"] == "value")
            |> fill(usePrevious: true)
            |> drop(columns: ["result", "table", "_start", "_stop", "_field", "source","domain","_measurement", "friendly_name"])
            |> pivot(rowKey: ["_time"], columnKey: ["entity_id"], valueColumn: "_value")
            |> yield(name: "last")"""
base_query = base_query.replace("start: ", f'start: {datetime.strftime(query_start, "%Y-%m-%dT%H:%M:%SZ")}')
for nb, entity in enumerate(variables["data"]):
    print(entity)
    entity = entity.split(":")
    query = base_query.replace("entity_name", entity[0], 1)
    if len(entity)> 1 and {entity[1]} != "":
        query = query.replace('r["_field"] == "value"', f'r["_field"] == "{entity[1]}"')
        query = query.replace('columnKey: ["entity_id"]', f'columnKey: ["_field"]')
        query = query.replace(
            'drop(columns: ["result", "table", "_start", "_stop", "_field", "source","domain","_measurement", "friendly_name"])',
            'drop(columns: ["result", "table", "_start", "_stop", "entity_id", "source","domain","_measurement", "friendly_name"])'
        )
    if len(entity)> 2 and {entity[2]} != "":
       query = query.replace('fn: last,', f'fn: {entity[2]},')
    # print(query)
    df = query_api.query_data_frame(query, org=variables["influx_db"]["org"])
    print(df.head())
    try:
        df.set_index('_time', inplace=True)
        df.drop(["result", "table"], axis=1, inplace=True)
        if nb == 0:
            full_df = df.copy()
        else:
            full_df = full_df.join(df,on="_time", how='outer')
    except KeyError:
        print(f"{entity[1]} was not found")
        if len(entity)> 2:
            full_df[entity[1]] =  entity[2]
        else: 
            full_df[entity[1]] = np.nan

full_df.head()

# Prepare data

In [None]:
print(full_df.head(10))
feature_names = variables["features"]

for feature in variables["numeric_features"]:
    full_df[feature] = pd.Series.interpolate(full_df[feature])

full_df.ffill(inplace=True)
full_df["home_status"] = full_df["in_bed"] + full_df["presence"]

print(full_df.shape)
full_df = full_df[full_df['ha_started']==1]
mask_heat = full_df['state']=='heat'
mask_cool = full_df['state']=='cool'
mask_off = full_df['state']=='off'
full_df.loc[mask_heat, 'target_temp_low'] = full_df.loc[mask_heat, 'temperature']
full_df.loc[mask_cool, 'target_temp_high'] = full_df.loc[mask_cool, 'temperature']
full_df.loc[mask_off, 'target_temp_low'] = 14
full_df.loc[mask_off, 'target_temp_high'] = 34
full_df.loc[pd.to_numeric(full_df['target_temp_low']) < 14, 'target_temp_low'] = 14
full_df.loc[pd.to_numeric(full_df['target_temp_low']) > 21, 'target_temp_low'] = 20.5
full_df.loc[pd.to_numeric(full_df['target_temp_high']) > 34, 'target_temp_high'] = 34
full_df.loc[pd.to_numeric(full_df['target_temp_high']) < 20, 'target_temp_high'] = 34
full_df['target_temp_low'].fillna(14)
full_df['target_temp_high'].fillna(34)
full_df.reset_index(inplace=True)
new_df = full_df[['_time'] + variables["features"] + variables["targets"]].copy()
new_df.reset_index(inplace=True)
new_df.dropna(inplace=True)
new_df.drop_duplicates(feature_names, inplace=True, ignore_index=True)
print(new_df.head(10))
new_df = new_df[new_df['_time']> start_time].copy()
print(new_df.shape)

target = new_df[variables["targets"]].copy()
numeric_features = new_df[feature_names].copy()
numeric_features.head(20)


# mlflow setup

In [None]:
print(new_df.head(10))
print(full_df.head(10))

#target.loc[target["target_temp_low"]>25, "target_temp_low"] = 20.5
plt.subplot(311)
plt.plot(new_df['_time'], target["target_temp_high"], )
plt.subplot(312)
plt.plot(new_df['_time'], target["target_temp_low"])
plt.subplot(313)
plt.plot(full_df['_time'], full_df["temperature"])

In [None]:
os.environ["MLFLOW_TRACKING_INSECURE_TLS"] = "true"
mlflow.set_tracking_uri(uri=variables["mlflow"]["url"])
mlflow.set_experiment("Thermostat setpoint")


mlflow.autolog()

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Flatten, Conv1D, MaxPooling1D, Normalization, Dropout, Reshape, Conv2D, MaxPooling2D
from tensorflow.keras.metrics import R2Score


# Data transformation

# Model, pipeline setup and fit

In [None]:
# Define input shape
input_shape = (numeric_features.values.shape[1],) 

mean = np.mean(numeric_features.values)
variance = np.var(numeric_features.values)
print(f"mean: {mean}")
print(f"variance: {variance}")

# Define the model
model = Sequential([
    #Conv1D(32, 8, input_shape=input_shape, activation='relu'),
    # MaxPooling1D(2,2),
    Normalization( input_shape=input_shape, mean=mean, variance=variance, axis=None),
    Reshape((3,6)),
    LSTM(256, return_sequences=True),
    # Conv2D(128, (2,3), activation='relu'),
    # MaxPooling2D(1,2),
    # Conv1D(256, 1, activation='relu'),
    # MaxPooling1D(1,2),
    Flatten(),
    Dense(512, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),
    Dense(512, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),
    Dense(256, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),
    Dense(128, kernel_initializer='normal', activation='relu'),
    Dropout(0.5),
    Dense(2, kernel_initializer='normal')  # 2 output targets
])

# Compile the model
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=[R2Score(), 'mse', 'mae', 'mape'])

# Display the model summary
model.summary()


In [None]:
# Train the model
batch_size = 25
epochs = 100

history = model.fit(numeric_features.values, target.values, epochs=epochs, validation_split=0.2)

run_dict =  mlflow.last_active_run().to_dictionary() 
print(run_dict)

mlflow.end_run()

In [None]:
signature = infer_signature(numeric_features, model.predict(numeric_features))
set_signature(f"runs:/{run_dict['info']['run_id']}/model", signature)

print(numeric_features.head())

In [None]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['mse'])
plt.plot(history.history['val_mse'])
plt.title('model accuracy')
plt.ylabel('mse')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:

# with mlflow.start_run() as run:
#     mlflow.autolog()
#     import autokeras as ak
#     os.environ["MLFLOW_TRACKING_INSECURE_TLS"] = "true"
#     mlflow.set_tracking_uri(uri=variables["mlflow"]["url"])
#     mlflow.set_experiment("Thermostat setpoint")
#     signature = infer_signature(numeric_features, model.predict(numeric_features))
#     # define the search
#     search = ak.StructuredDataRegressor(max_trials=100, loss='mean_absolute_error',  metrics=['mse', 'mae', 'mape'])
#     # perform the search
#     history = search.fit(x=numeric_features.values, y=target.values, validation_split=0.2,)

#     mae, mse, mape, _ = search.evaluate(numeric_features.values, target.values, verbose=0)
#     print('MAE: %.3f' % mae)

#     # get the best performing model
#     model = search.export_model()
#     # summarize the loaded model
model.summary()

In [None]:
# run_dict =  mlflow.last_active_run().to_dictionary() 
# print(run_dict)
# signature = infer_signature(numeric_features, model.predict(numeric_features))
# set_signature(f"runs:/{run_dict['info']['run_id']}/model", signature)

# # list all data in history
# print(history.history.keys())
# # summarize history for accuracy
# plt.plot(history.history['mse'])
# # plt.plot(history.history['val_mse'])
# plt.title('model accuracy')
# plt.ylabel('mse')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()
# # summarize history for loss
# plt.plot(history.history['loss'])
# # plt.plot(history.history['val_loss'])
# plt.title('model loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.show()

In [None]:

# new_df.to_csv('thermostat_2022-23.csv', encoding='utf-8')
#!export MLFLOW_TRACKING_URI=variables["mlflow"]["url"]
#!mlflow models build-docker -m runs:/57e811d9acd34c0db02d762d6fa6c31b/model -n paillomams/presence-det --enable-mlserver

#!docker push paillomams/presence-det