In [17]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import random
from scipy.optimize import minimize
import plotly.express as px



This loads the data for one device:

In [18]:
df = None
directory = "/Users/timruppert/Downloads/ev3_device_data_sample"

for filename in os.listdir(directory):
    file_path = os.path.join(directory, filename)
    if os.path.isfile(file_path) and file_path.endswith(".csv"):  # Ensure it's a file
        df_ = pd.read_csv(file_path).sort_values(by="year_month_day").reset_index(drop=True)
        if df is None:
            df = df_
            
        else:
            df = pd.concat([df, df_])
    



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



The calculation of the DeltaT Benschmark is done by $\displaystyle\frac{\Delta T}{\Delta T_\text{Setpoint}}$.

Also, we calculate the total energy used (in kWh) by adding the Heating Energy to the Cooling Energy.

In [19]:
df = df.sort_values(by="year_month_day")
df = df.reset_index(drop=True)

df['BENCHMARK_DELTA_T'] = (df['DeltaT_K']) / df['SpDeltaT_applied_K']
df["total_energy_kWh"] = (df["Heating_E_J"] + df["Cooling_E_J"]) / 3600000

## Mock data Generation for indoor & outdoor temperature

The given dataset didn't include indoor & outdoor temperature, but we will need it for further benchmarking. As these are timeseries that are available in real world use, we will generate randomized mock data. (Which still follows correct correlation patterns):

In [20]:
prev_T_out = 273.15 + 20 + random.randint(-20, 20)/10
prev_T_in = 273.15 + 21 + random.randint(-5, 5)/10
prev_entry = df.index[0]
for entry in df.index[1:]:
    
    format_string = '%Y-%m-%dT%H:%M:%S.%fZ'
    dt = (datetime.strptime(df.loc[entry, "sample_time"], format_string) - datetime.strptime(df.loc[prev_entry, "sample_time"], format_string)).total_seconds()

    T_out = prev_T_out + random.randint(-20, 20)/10
    T_in = prev_T_in + 0.5*(T_out-prev_T_out)
    df.loc[entry, "T_in"] = float(T_in)
    df.loc[entry, "T_out"] = float(T_out)

    T_in_prime = prev_T_in + 0.5*(T_out-prev_T_out)

    df.loc[entry, "T_in_prime"] = float(T_in_prime)

    prev_T_out = T_out
    prev_T_in = T_in
    prev_entry = entry


## Temperature Equilibration Model and Insulation Score calculation

We have the following discrete dynamical System to model the temperature equilibration:

$\displaystyle T_{in}(t)-T_{in}(t-1) = \alpha\cdot\big(T_{set}(t)-T_{in}(t)\big) + \beta\cdot\big(T_{out}(t)- T_{in}(t)\big)$

using a scipy optimizer we can find the parameters $\alpha$ and $\beta$, which are the coupling constants between the indoor temperature and the Set temperature and the indoor temperature and the outdoor temperature. 

So we can interpret $\beta$ beta as the wellness of the insulation.

In [21]:
def identify_parameters(f_prime, f, g, h):
    """
    Identifies the parameters a and b for the equation:
    f'(t) = a*(g(t)-f(t)) + b*(h(t)-f(t))
    
    Args:
        f_prime (np.array): First derivative of f(t)
        f (np.array): Time series f(t)
        g (np.array): Time series g(t)
        h (np.array): Time series h(t)
    
    Returns:
        tuple: (a, b) - the identified parameters
    """

    # Calculate differences for the model
    dg = g - f  # g(t)-f(t)
    dh = h - f  # h(t)-f(t)
    
    # Define objective function for optimization
    def objective(params):
        a, b = params
        predicted = a * dg + b * dh
        return np.sum((predicted - f_prime) ** 2)  # Sum of squared errors
    
    # Start optimization
    result = minimize(objective, x0=[0.1, 0.1], method='Nelder-Mead')
    
    return result.x[1]

Genrate the day samples:

In [22]:

df["ID"] = df.index
lst = df.groupby('year_month_day').agg({'BENCHMARK_DELTA_T': 'mean', 'total_energy_kWh': lambda x: x.max() - x.min(), "ID": ["min", "max"]}).reset_index()

In [23]:
n = 30

for day in lst.index[n:]:
    min_idx = lst.loc[day-n, ("ID", "min")]
    max_idx = lst.loc[day, ("ID", "max")]
    lst.loc[day, "BENCHMARK_INSULATION"] = identify_parameters(-df.loc[min_idx:max_idx, "T_in_prime"], df.loc[min_idx:max_idx, "T_in"], df.loc[min_idx:max_idx, "T_out"], [273.15 + 21 for _ in range(len(df.loc[min_idx:max_idx, "T_in"]))])

lst = lst.sort_values(by="year_month_day")



Lastly, we compute the Energy Performance Index using an set floor area of the building.

In [24]:
A = 1000 # m2

In [25]:
c = ((365*24 )/ A)


In [26]:
a = lst["total_energy_kWh"]*c

a = a.to_numpy().T[0]
a

array([1.99489329e+01, 3.21780607e+02, 2.28165407e+02, 8.46807579e-04,
       3.95878168e-02, 8.06914767e+01, 3.07221867e+01, 4.89282551e+01,
       2.48179547e+01, 2.74652684e+01, 2.90618984e-02, 2.31894850e-02,
       5.05636330e+00, 4.04512876e+01, 1.11281639e+02, 5.52975777e+01,
       2.79631298e+01, 1.62031807e-02, 2.27239290e-02, 8.80454986e-02,
       1.65235755e+01, 3.43065134e+01, 2.53437031e+01, 1.31560416e+01,
       1.04042120e-02, 1.72437250e-02, 8.75505787e+00, 2.16454328e-02,
       4.25779581e+01, 2.79108695e+01, 7.23214153e+01, 1.49074316e-02,
       1.43453415e-02, 6.67859737e+01, 1.70028476e+01, 5.79982712e+01,
       6.59380465e+01, 2.40830499e+01, 6.21347682e-03, 5.83601715e-03,
       1.98468080e-02, 6.24859214e+01, 6.87794720e+01, 7.30383495e+01,
       6.78814530e+01, 1.11044298e-02, 1.14212979e-02, 2.37470413e+01,
       1.16920239e-02, 1.35222253e-02, 1.53623164e-02, 1.51429127e-02,
       1.40487053e-02, 1.63215490e-02, 3.33971963e+01, 9.72302099e-01,
      

In [27]:


data = pd.DataFrame(data={
    "year_month_day":lst["year_month_day"],
    "BENCHMARK_DELTA_T":lst[("BENCHMARK_DELTA_T", "mean")],
    "BENCHMARK_EPI":a,
    "BENCHMARK_INSULATION":lst[("BENCHMARK_INSULATION")]
})
data


Unnamed: 0,year_month_day,BENCHMARK_DELTA_T,BENCHMARK_EPI,BENCHMARK_INSULATION
0,20200401,0.683374,19.948933,
1,20200402,0.740721,321.780607,
2,20200403,0.530163,228.165407,
3,20200404,0.104082,0.000847,
4,20200405,0.124728,0.039588,
...,...,...,...,...
635,20211227,0.122020,0.000123,-2940.499983
636,20211228,0.123328,0.001224,-2940.500023
637,20211229,0.118200,0.000252,-2940.499979
638,20211230,0.121759,0.011311,-2940.500032


In [28]:
px.line(data, y="BENCHMARK_DELTA_T", title="BENCHMARK_DELTA_T")

In [29]:
px.line(data, y="BENCHMARK_EPI", title="BENCHMARK_EPI")

In [30]:
px.line(data, y="BENCHMARK_EPI", title="BENCHMARK_EPI")

In [31]:
px.line(data, y="BENCHMARK_INSULATION", title="BENCHMARK_INSULATION")

In [32]:
data.to_csv("data.csv", index=False)