In [1]:
from utils import hvac_data_gen as hvdg
from datetime import datetime, timedelta
import plotly.express as px
import stumpy
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Generate HVAC Data: Amplitude Anomaly

In [2]:

generator = hvdg.HVACDataGenerator(seed=10)

In [70]:

anomaly_config = [
        {
            'unit': 1,
            'type': 'amplitude',
            'start_day': 3,
            'start_hour': 8,
            'duration_hours': 48,
            'params': {'scale_factor': 0.5}
        },
    ]
    
phase_offset_df = generator.generate_container_data(
    container_id=1,
    start_time=datetime(2026, 1, 15),
    duration_days=7,
    anomaly_config=anomaly_config
)
phase_offset_df['anom_type'] = 'phase_offset'


# Visualize TempRet
fig = px.line(phase_offset_df, x='timestamp_et', y='TmpRet', color='unit') # mode='line')
fig.add_trace(
    go.Scatter(
        x=phase_offset_df['timestamp_et'],
        y=phase_offset_df['anomaly'].astype(int),
        mode='markers',
        name='Anomaly',
        yaxis='y2',
        marker=dict(color='red', size=6)
    )
)

# Configure secondary y-axis
fig.update_layout(
    yaxis2=dict(
        title='Anomaly',
        overlaying='y',
        side='right',
        range=[-0.1, 1.1]  # gives some padding for boolean 0/1
    )
)

fig.show()

In [10]:
phase_offset_df.head()

Unnamed: 0,timestamp_et,unit,TmpRet,anomaly,anomaly_type,container_id,anom_type
0,2026-01-15 00:00:00,0,51.580346,False,normal,1,phase_offset
1,2026-01-15 00:00:00,1,51.330456,False,normal,1,phase_offset
2,2026-01-15 00:00:00,2,50.912649,False,normal,1,phase_offset
3,2026-01-15 00:01:00,0,51.625019,False,normal,1,phase_offset
4,2026-01-15 00:01:00,1,51.601706,False,normal,1,phase_offset


In [5]:
phase_offset_df[phase_offset_df['anomaly_type'] != 'normal']

Unnamed: 0,timestamp_et,unit,TmpRet,anomaly,anomaly_type,container_id,anom_type
14400,2026-01-18 08:00:00,0,55.148421,True,amplitude,1,phase_offset
14401,2026-01-18 08:00:00,1,54.518009,True,amplitude,1,phase_offset
14402,2026-01-18 08:00:00,2,55.005532,True,amplitude,1,phase_offset
14403,2026-01-18 08:01:00,0,55.092376,True,amplitude,1,phase_offset
14404,2026-01-18 08:01:00,1,54.539494,True,amplitude,1,phase_offset
...,...,...,...,...,...,...,...
23035,2026-01-20 07:58:00,1,51.897392,True,amplitude,1,phase_offset
23036,2026-01-20 07:58:00,2,52.800710,True,amplitude,1,phase_offset
23037,2026-01-20 07:59:00,0,52.579401,True,amplitude,1,phase_offset
23038,2026-01-20 07:59:00,1,51.940463,True,amplitude,1,phase_offset


# Isolation Forest
- Doesn't work because of high density of anomalies

In [None]:

from TSB_AD.model_wrapper import run_Unsupervise_AD

In [11]:
feat_cols = ['TmpRet', 'unit']
from sklearn.preprocessing import StandardScaler

# Pivot the dataframe
pivoted_df = phase_offset_df.pivot_table(
    index='timestamp_et',
    columns='unit',
    values='TmpRet',
)

# Scale the data (maintains DataFrame structure)
scaler = StandardScaler()
scaled_df = pd.DataFrame(
    scaler.fit_transform(pivoted_df.fillna(pivoted_df.mean())),
    index=pivoted_df.index,
    columns=pivoted_df.columns
)
scaled_df.head()
window_size = 5  # adjust window size as needed
smoothed_df = scaled_df.rolling(window=window_size, center=True).mean()

# Remove NaN values from the beginning/end
smoothed_df = smoothed_df.dropna()

## Why IForest doesn't work?
1. anomalous lie lose to normal points in values
1. Because isolation forest has no concept of temporal relationships
2. Maybe some features can be designed for this, but if you don't know what you are looking for then its hard to design right features.
   1. Autoencoders can work here to flag unknown unknowns. 

In [25]:
px.scatter(
    x=phase_offset_df.loc[phase_offset_df['unit'] == 0, 'TmpRet'], 
    y=phase_offset_df.loc[phase_offset_df['unit'] == 1, 'TmpRet'], 
    color = phase_offset_df.loc[phase_offset_df['unit'] == 0, 'anomaly']
           )

In [None]:

# Feed to iforest
iforest_score = run_Unsupervise_AD('IForest', scaled_df, contamination=0.5)

In [16]:
px.line(iforest_score)

# Matrix Profile

TODO: read about FLOSS for detecting regime changes https://stumpy.readthedocs.io/en/latest/Tutorial_Semantic_Segmentation.html


In [69]:
import stumpy

# Compute matrix profile
window_size = 60*24  # adjust to your pattern length
mp = stumpy.stump(scaled_df.iloc[:, 0].values, m=window_size)
matrix_profile = mp[:, 0]

# High MP values = unusual patterns
threshold = np.percentile(matrix_profile, 90)
anomalies = matrix_profile > threshold

px.line(matrix_profile)

In [73]:
scaled_df.values

array([[-0.37260718, -0.42366675, -0.38295066],
       [-0.42833858, -0.40849069, -0.41485078],
       [-0.40445957, -0.44046579, -0.39160914],
       ...,
       [-1.19470286, -1.19360589, -1.19309488],
       [-1.17369659, -1.27767433, -1.18976887],
       [-1.17951867, -1.18365867, -1.20350349]])

In [74]:
import stumpy

# Compute matrix profile on all units combined
matrix_profile = stumpy.stump(scaled_df.iloc[:, 0].values, m=window_size)[:, 0]

# Look for sudden jumps in matrix profile (regime change indicator)
mp_change = np.abs(np.diff(matrix_profile))
regime_change_threshold = np.percentile(mp_change, 95)

regime_changes = np.where(mp_change > regime_change_threshold)[0]

# Plot to visualize
fig = go.Figure()
fig.add_trace(go.Scatter(y=matrix_profile, name='Matrix Profile', mode='lines'))
for rc in regime_changes:
    fig.add_vline(x=scaled_df.index[rc], line_dash='dash', line_color='red')
fig.show()



In [75]:
from sklearn.neighbors import LocalOutlierFactor

lof = LocalOutlierFactor(n_neighbors=20, contamination=0.1)
anomalies = lof.fit_predict(scaled_df.values)
anomalies_bool = anomalies == -1

anomaly_series = pd.Series(anomalies_bool, index=scaled_df.index)


In [76]:
px.line(anomaly_series)