# Imports

In [7]:
import hvac_data_gen as hvdg
from datetime import datetime, timedelta
import plotly.express as px
import stumpy

# Generate HVAC data

In [2]:
generator = hvdg.HVACDataGenerator(seed=10)


In [50]:

anomaly_config = [
        {
            'unit': 1,
            'type': 'lag',
            'start_day': 2,
            'start_hour': 8,
            'duration_hours': 24*2,
            'params': {'lag_minutes': 60*3}
        },
    ]
    
single_container_df = generator.generate_container_data(
    container_id=0,
    start_time=datetime(2026, 1, 15),
    duration_days=5,
    #anomaly_config=anomaly_config
)
px.line(single_container_df, x='timestamp_et', y='TmpRet', color='HVACNum') # mode='line')

# Matrix Profile

In [51]:
single_container_df['TmpRet_smooth'] = single_container_df.groupby(['container_id', 'HVACNum'])['TmpRet'].transform(
    lambda x: x.rolling(window=10).mean()
)

In [52]:

px.line(single_container_df, x='timestamp_et', y='TmpRet_smooth', color='HVACNum') # mode='line')

- transform data s.t. all hvac units are 
- index unit_ts

In [53]:
df = single_container_df.dropna().copy(deep=True)
df.head()


Unnamed: 0,timestamp_et,HVACNum,TmpRet,anomaly,anomaly_type,container_id,TmpRet_smooth
27,2026-01-15 00:09:00,0,48.739389,False,normal,0,48.685951
28,2026-01-15 00:09:00,1,48.573434,False,normal,0,48.98242
29,2026-01-15 00:09:00,2,48.831019,False,normal,0,49.300762
30,2026-01-15 00:10:00,0,48.617363,False,normal,0,48.679713
31,2026-01-15 00:10:00,1,48.607197,False,normal,0,48.837199


In [54]:
df_pivot = df.pivot(index='timestamp_et', columns='HVACNum', values='TmpRet_smooth')

In [55]:
df_pivot.head()

HVACNum,0,1,2
timestamp_et,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2026-01-15 00:09:00,48.685951,48.98242,49.300762
2026-01-15 00:10:00,48.679713,48.837199,49.134326
2026-01-15 00:11:00,48.653783,48.70793,48.994981
2026-01-15 00:12:00,48.663095,48.698755,48.871608
2026-01-15 00:13:00,48.673652,48.700755,48.718978


In [56]:
import numpy as np
window_size = 60*2
for i in [0, 1, 2]:
    for j in range(i+1, 3):
        df_pivot[f'dist_{i}_{j}'] = np.sqrt(
            ((df_pivot[i] - df_pivot[j])**2).rolling(window_size).sum()
        )

In [57]:
df_pivot.head()

HVACNum,0,1,2,dist_0_1,dist_0_2,dist_1_2
timestamp_et,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2026-01-15 00:09:00,48.685951,48.98242,49.300762,,,
2026-01-15 00:10:00,48.679713,48.837199,49.134326,,,
2026-01-15 00:11:00,48.653783,48.70793,48.994981,,,
2026-01-15 00:12:00,48.663095,48.698755,48.871608,,,
2026-01-15 00:13:00,48.673652,48.700755,48.718978,,,


In [58]:
px.line(df, x='timestamp_et', y='TmpRet', color='HVACNum') # mode='line')

In [62]:
px.line(df_pivot, y='dist_0_2')

In [63]:
px.line(df_pivot, y='dist_1_2')

In [None]:
df_long = df_pivot.reset_index().melt(
    id_vars=['timestamp_et', 'dist_0_1', 'dist_0_2', 'dist_1_2'], 
    var_name='HVACNum', 
    value_name='TmpRet'
)


In [None]:
mp = stumpy.stump(df['TmpRet_smooth'].values, window_size, normalize=False)

In [27]:
mp.shape

(21544, 4)

In [28]:
df.shape

(21573, 7)

In [30]:
mp

mparray([[3.571778949619093, 8625, -1, 8625],
         [3.69452303849123, 1741, -1, 1741],
         [3.526109523067083, 1742, -1, 1742],
         ...,
         [0.1089220945455469, 21374, 21374, -1],
         [0.11283583099640251, 21375, 21375, -1],
         [0.11241793993518912, 21482, 21482, -1]], dtype=object)

In [33]:
mp[0, :]

mparray([3.571778949619093, 8625, -1, 8625], dtype=object)

In [34]:
px.line(mp[:, 0])

In [None]:

print(f"\nGenerated {len(single_container_df)} records")
print(f"Anomaly records: {single_container_df['anomaly'].sum()}")
print("\nFirst few records:")
print(single_container_df.head(10))
    
    # Visualize

In [None]:
c1 = single_container_df #[single_container_df["HVACNum"] == 2]
c1['HVACNum'] = c1['HVACNum'].astype('category')
# c1['TmpRet_smooth'] = c1['TmpRet'].groupby('HVACNum').rolling(window=20, center=True).mean()
c1['TmpRet_smooth'] = c1.groupby(['container_id', 'HVACNum'])['TmpRet'].transform(
    lambda x: x.rolling(window=10).mean()
)


In [None]:
single_container_df[single_container_df['anomaly']]