In [1]:
!pip install scikit-learn==1.3.* numpy==1.25.*

Looking in indexes: https://pypi.org/simple, https://www.piwheels.org/simple
Collecting scikit-learn==1.3.*
  Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl.metadata (11 kB)
Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (10.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.5.2
    Uninstalling scikit-learn-1.5.2:
      Successfully uninstalled scikit-learn-1.5.2
Successfully installed scikit-learn-1.3.2
[0m

In [2]:
import pandas as pd
from pathlib import Path

In [3]:
fn = list(sorted(Path(".").glob("**/homeassistant_export.*")))[-1]
df = pd.read_csv(fn, index_col=0)
df

Unnamed: 0,last_updated_datetime,sensor.forecast_temperature,sensor.house_consumption_daily,sensor.teplomer_venku_temperature
0,2024-10-20 12:00:00,15.300000,30.1,24.631915
1,2024-10-20 13:00:00,15.960673,33.0,24.655556
2,2024-10-20 14:00:00,16.107738,34.6,24.517610
3,2024-10-20 15:00:00,15.755480,36.9,24.447572
4,2024-10-20 16:00:00,14.687518,39.0,24.607072
...,...,...,...,...
831,2024-11-24 15:00:00,11.053594,42.3,12.399501
832,2024-11-24 16:00:00,10.818492,45.6,11.376284
833,2024-11-24 17:00:00,10.984840,49.8,10.887013
834,2024-11-24 18:00:00,10.905913,53.1,10.264910


In [4]:
# the largest value in column "sensor.house_consumption_daily" in each day is the total consumption of the house.
# I want to create the column with remaining consumption of the house for each day.

# Ensure the 'timestamp' column is of datetime type
df['last_updated_datetime'] = pd.to_datetime(df['last_updated_datetime'])

# Extract the date from the timestamp
df['date'] = df['last_updated_datetime'].dt.date

# Calculate the daily maximum consumption
df['daily_max_consumption'] = df.groupby('date')['sensor.house_consumption_daily'].transform('max')

# Calculate the remaining consumption
df['remaining_house_consumption'] = df['daily_max_consumption'] - df['sensor.house_consumption_daily']

# Drop the intermediate 'daily_max_consumption' column if not needed
df.drop(columns=['daily_max_consumption'], inplace=True)




In [5]:
df

Unnamed: 0,last_updated_datetime,sensor.forecast_temperature,sensor.house_consumption_daily,sensor.teplomer_venku_temperature,date,remaining_house_consumption
0,2024-10-20 12:00:00,15.300000,30.1,24.631915,2024-10-20,18.2
1,2024-10-20 13:00:00,15.960673,33.0,24.655556,2024-10-20,15.3
2,2024-10-20 14:00:00,16.107738,34.6,24.517610,2024-10-20,13.7
3,2024-10-20 15:00:00,15.755480,36.9,24.447572,2024-10-20,11.4
4,2024-10-20 16:00:00,14.687518,39.0,24.607072,2024-10-20,9.3
...,...,...,...,...,...,...
831,2024-11-24 15:00:00,11.053594,42.3,12.399501,2024-11-24,13.0
832,2024-11-24 16:00:00,10.818492,45.6,11.376284,2024-11-24,9.7
833,2024-11-24 17:00:00,10.984840,49.8,10.887013,2024-11-24,5.5
834,2024-11-24 18:00:00,10.905913,53.1,10.264910,2024-11-24,2.2


In [6]:
# want to add column with the number of day in year
df['day_of_year'] = df['last_updated_datetime'].dt.dayofyear
df['hour'] = df['last_updated_datetime'].dt.hour

In [7]:
# now i want to train regression model to predict the remaining consumption of the house for today
if "sensor.house_consumption_daily" in df.columns:
    df.drop(columns=["sensor.house_consumption_daily"], inplace=True)

from sklearn.model_selection import train_test_split

# Define the features and target variable
X = df[['day_of_year', 'hour', "sensor.forecast_temperature"]]
y = df['remaining_house_consumption']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)





In [8]:
from sklearn.linear_model import LinearRegression

# Initialize the model
model = LinearRegression()
model.fit(X_train, y_train)

model.score(X_test, y_test)


0.5911343399275768

In [9]:
# i want to try the svr and neural network
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

# Initialize the models

nn = MLPRegressor(hidden_layer_sizes=(30,70),learning_rate_init=0.001, max_iter=400)

# Fit the models

nn.fit(X_train, y_train)
nn.score(X_test, y_test)

0.7266465148798934

In [10]:
svr = SVR()
svr.fit(X_train, y_train)
svr.score(X_test, y_test)

0.05724026117039849

In [11]:
# i want to try the random forest
from sklearn.ensemble import RandomForestRegressor

# Initialize the model
rf = RandomForestRegressor()
rf.fit(X_train, y_train)

rf.score(X_test, y_test)

0.9733030871465415

In [12]:
X_test

Unnamed: 0,day_of_year,hour,sensor.forecast_temperature
611,320,11,6.081518
823,329,7,7.084602
290,306,20,9.076262
801,328,9,-0.390956
168,301,12,16.802790
...,...,...,...
653,322,5,1.229704
456,314,0,2.795630
778,327,10,-2.177654
532,317,4,1.371019


In [13]:
rf.predict([[350, 1, -10]])



array([72.01])

In [14]:
rf.predict([[250, 1, 15]])




array([36.21])

In [15]:
import numpy as np
np.__version__



'1.26.4'

In [16]:
import pickle


In [17]:
import joblib

# Save the trained model
joblib.dump(nn, "remaining_consumption_model.pkl")

['remaining_consumption_model.pkl']

# Copy file in terminal

```bash
cp /config/notebooks/home-assistant/remaining_consumption_model.pkl /addon_configs/a0d7b954_appdaemon/apps/
```

In [18]:
# cp /config/notebooks/home-assistant/remaining_consumption_model.pkl /addon_configs/a0d7b954_appdaemon/apps/