In [None]:
## oneclass svm and isolation forest
# LOF (Local Outlier Factor): Detects anomalies by evaluating the local density deviation of data points.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
import plotly.graph_objects as go
import plotly.subplots as sp
from sklearn.ensemble import IsolationForest
from plotly.subplots import make_subplots
from plot_anomaly import multivariate_anomaly_plot

# Load your data (replace this with your actual data loading step)
# For demonstration, let's generate sample data similar to your distribution
data = pd.read_csv("times_series_data_no_labels.csv" ,
    index_col='datetime',
    parse_dates=['datetime']
    )

data.describe()

## Raw input

In [None]:
iso_forest = IsolationForest(contamination=0.005, random_state=42)

# Fit the model
data['anomaly'] = iso_forest.fit_predict(data[['data_0', 'data_1']])
data.loc[:, 'is_anomaly'] = data['anomaly'].apply(lambda x: True if x == -1 else False)

multivariate_anomaly_plot(data)

## Isolation forest - Feature engineering

In [None]:
data = pd.read_csv("times_series_data_no_labels.csv" ,
    index_col='datetime',
    parse_dates=['datetime']
    )

data.describe()

In [None]:
from sklearn.preprocessing import StandardScaler

window = 288
data['hour'] = data.index.hour
data['minute'] = data.index.minute
data['timestamp'] = data['hour'] * 60 + data['minute']
data['lag1_sensor1'] = data['data_0'].shift(1)
data['lag1_sensor2'] = data['data_1'].shift(1)

data['lag2_sensor1'] = data['data_0'].shift(2)
data['lag2_sensor2'] = data['data_1'].shift(2)
data['rolling_mean_sensor1'] = data['data_0'].rolling(window=window).mean()
data['rolling_std_sensor1'] = data['data_0'].rolling(window=window).std()
data['rolling_mean_sensor2'] = data['data_1'].rolling(window=window).mean()
data['rolling_std_sensor2'] = data['data_1'].rolling(window=window).std()

def night_time_indicator(dt):
    if 23 <= dt.hour or dt.hour < 4:
        return 0
    else:
        return 1
    

def ramp_up_down_time_indicator(dt):
    if 4 <= dt.hour or dt.hour < 5:
        return 3
    elif 22 <= dt.hour or dt.hour < 23 :
        return 2

# Apply the function to the index and create a new column
data['daytime_indicator'] = data.index.map(night_time_indicator)
data['daytime_indicator'] = data.index.map(ramp_up_down_time_indicator)

# Drop NaN values
data.dropna(inplace=True)

# Fit Isolation Forest
features = ['data_0', 
            'data_1', 
            'lag1_sensor1', 'lag1_sensor2', 
            'lag2_sensor1', 'lag2_sensor2', 
            # "hour", "minute",
            # "timestamp",
            "daytime_indicator",
            # 'rolling_mean_sensor1', 'rolling_std_sensor1', 'rolling_mean_sensor2', 'rolling_std_sensor2'
            ]

scaler = StandardScaler()
np_scaled = scaler.fit_transform(data[features])
data_scaled = pd.DataFrame(np_scaled)

clf = IsolationForest(contamination=0.005, random_state=42)
clf.fit(data_scaled)
data['anomaly'] = clf.predict(data_scaled)

# -1 for anomalies, 1 for normal

data.loc[:, 'is_anomaly'] = data['anomaly'].apply(lambda x: True if x == -1 else False)

multivariate_anomaly_plot(data)

## Another implementation of isolation forest

In [None]:
data = pd.read_csv("times_series_data_no_labels.csv" ,
    index_col='datetime',
    parse_dates=['datetime']
    )

data.describe()

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
np_scaled = scaler.fit_transform(data)

data_scaled = pd.DataFrame(np_scaled)

# Isolation forest 
outliers_fraction = 0.005
ifo = IsolationForest(contamination = outliers_fraction)

ifo.fit(data_scaled)
data['anomaly'] = ifo.predict(data_scaled)


data.loc[:, 'is_anomaly'] = data['anomaly'].apply(lambda x: True if x == -1 else False)

multivariate_anomaly_plot(data)