In [None]:
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings("ignore")

import plotly.io as pio
pio.renderers.default = "notebook"

df = pd.read_csv("/Users/sungwoo/Downloads/matrix-nilm-test/csv/house_009/H009_ch01_20231002.csv",
                 index_col=False,
                 parse_dates=['date_time']   # date_time 컬럼을 datetime으로 변환
                 )

df.info()

In [None]:
df.head()

In [None]:
df['active_power_diff'] = df['active_power'].diff().fillna(0)

features = [
    'active_power', 'current', 'voltage', 'apparent_power', 'reactive_power',
    'active_power_diff'
]

In [None]:
from sklearn.preprocessing import RobustScaler
X_scaled = RobustScaler().fit_transform(df[features])

In [None]:
from sklearn.ensemble import IsolationForest

model = IsolationForest(contamination=0.005, random_state=42)
df['anomaly_multi'] = model.fit_predict(X_scaled)

In [None]:
# 1. anomaly flag/group 생성
df['anomaly_flag'] = (df['anomaly_multi'] == -1).astype(int)
df['anomaly_group'] = (df['anomaly_flag'].diff(1) != 0).cumsum()

# 2. 그룹 생성 및 필터링 기준 적용 (ex: len >= 10)
grouped = df[df['anomaly_flag'] == 1].groupby('anomaly_group')
valid_anomaly_groups = [gid for gid, g in grouped if len(g) >= 10]

# 3. 필터링된 결과 저장
df['anomaly_multi_filtered'] = 0
df.loc[df['anomaly_group'].isin(valid_anomaly_groups), 'anomaly_multi_filtered'] = -1

# 4. 몇 개 제거됐는지 확인
before = (df['anomaly_multi'] == -1).sum()
after = (df['anomaly_multi_filtered'] == -1).sum()
print(f"Before filtering: {before}")
print(f"After filtering : {after}")
print(f"Filtered out    : {before - after}")

In [None]:
num_multi = (df['anomaly_multi'] == -1).sum()
print(f"다변량 이상치 개수: {num_multi} / 전체: {len(df)} → {num_multi / len(df):.4%}")

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 4))
plt.plot(df.index, df['active_power'], label='active_power', alpha=0.7, linewidth=1.2, zorder=1)

plt.scatter(
    df[df['anomaly_multi_filtered'] == -1].index,
    df[df['anomaly_multi_filtered'] == -1]['active_power'],
    color='red', label='Multivariate Anomaly', s=10, zorder=2
)

plt.title("Multivariate Anomalies on Active Power Time Series")
plt.xlabel("Time")
plt.ylabel("Active Power")
plt.legend()
plt.show()

In [None]:
df.set_index('date_time', inplace=True)
df_zoom = df['2023-10-02 20:00:00':'2023-10-02 22:00:00']

plt.figure(figsize=(15, 4))
plt.plot(df_zoom.index, df_zoom['active_power'], label='active_power', alpha=0.7, linewidth=1.2)

plt.scatter(
    df_zoom[df_zoom['anomaly_multi_filtered'] == -1].index,
    df_zoom[df_zoom['anomaly_multi_filtered'] == -1]['active_power'],
    color='red', label='Multivariate Anomaly', s=20
)

plt.title("Zoomed View: Multivariate Anomalies (20:00 ~ 22:00)")
plt.xlabel("Time")
plt.ylabel("Active Power")
plt.legend()
plt.show()

In [None]:
# from sklearn.decomposition import PCA
# import seaborn as sns

# pca = PCA(n_components=2)
# X_pca = pca.fit_transform(X_scaled)

# df['pca1'] = X_pca[:, 0]
# df['pca2'] = X_pca[:, 1]

# sns.scatterplot(x='pca1', y='pca2', data=df, hue='anomaly_multi', palette={1: 'blue', -1: 'red'}, s=10)
# plt.title("PCA Projection of Multivariate Anomaly Detection")
# plt.show()

JSON 라벨과 함께 패턴 분석

In [None]:
import os
import json
import pandas as pd

json_dir = "/Users/sungwoo/Downloads/matrix-nilm-test/json/house_009"
appliance_on_dict = {}

for fname in os.listdir(json_dir):
    if fname.endswith(".json") and "ch01" not in fname:
        path = os.path.join(json_dir, fname)
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)

        label_list = data['labels'].get('active_inactive', [])

        rows = []
        for entry in label_list:
            if len(entry) >= 2:
                start, end = entry[:2]
                rows.append({
                    "start": pd.to_datetime(start),
                    "end": pd.to_datetime(end)
                })

        if rows:
            df_on = pd.DataFrame(rows)
            appliance_on_dict[fname.replace(".json", "")] = df_on

In [None]:
import plotly.graph_objects as go
from itertools import cycle
import plotly.colors as pc

fig = go.Figure()

# 전체 active power
fig.add_trace(go.Scatter(
    x=df.index, y=df['active_power'],
    mode='lines', name='Main Active Power', line=dict(color='royalblue')
))

# 이상치 점
anomalies = df[df['anomaly_multi_filtered'] == -1]
fig.add_trace(go.Scatter(
    x=anomalies.index, y=anomalies['active_power'],
    mode='markers', name='Anomaly',
    marker=dict(color='red', size=4)
))

# 개별 기기 ON block을 한 기기당 하나의 trace로만 생성
colors = cycle(pc.qualitative.Set3 + pc.qualitative.Pastel1 + pc.qualitative.Pastel2)

for appliance, group_df in appliance_on_dict.items():
    color = next(colors)

    for_plot = []

    for row in group_df.itertuples():
        for_plot += [
            dict(
                x=[row.start, row.end, row.end, row.start, row.start],
                y=[0, 0, df['active_power'].max(), df['active_power'].max(), 0]
            )
        ]

    for shape in for_plot:
        fig.add_trace(go.Scatter(
            x=shape['x'],
            y=shape['y'],
            fill='toself',
            fillcolor=color,
            line=dict(width=0),
            name=appliance,
            mode='lines',
            opacity=0.25,
            showlegend=True  # 하나의 trace이므로 legend와 완전 연동
        ))

fig.update_layout(
    title="Main Active Power with Anomalies and Appliance ON Times (Legend Toggle Works)",
    xaxis_title="Time",
    yaxis_title="Power (W)",
    legend=dict(orientation="v", itemsizing='constant')
)

fig.show()

In [None]:
import plotly.io as pio

# 레이아웃 설정을 크게
fig.update_layout(
    width=1920,
    height=1000,
    title="Main Active Power with Anomalies and Appliance ON Times (FULL SCREEN)",
    xaxis_title="Time",
    yaxis_title="Power (W)",
    legend=dict(orientation="v", itemsizing='constant')
)

# 전체화면 브라우저 띄우기
pio.renderers.default = 'browser'
pio.show(fig)  # 이걸로 전체화면으로 띄워짐