In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os 
from pyod.models.iforest import IForest
import numpy as np
from sklearn.metrics import classification_report
from pyod.models.hbos import HBOS
from pyod.models.knn import KNN
from pyod.models.feature_bagging import FeatureBagging
from sklearn.metrics import accuracy_score
import warnings
from pyod.models.auto_encoder import AutoEncoder

from scipy.stats import ConstantInputWarning

warnings.simplefilter("ignore", ConstantInputWarning)


In [3]:
df=pd.read_csv("/Users/saikarthik/Desktop/Nexus-S1 Deakin /cleaned.csv")

In [None]:
df.columns

In [4]:
df.drop(columns=['Unnamed: 0'], inplace=True)


In [None]:
df.columns

In [5]:
mean_value = df['value'].mean()
std_dev_value = df['value'].std()

In [6]:
threshold_upper = mean_value + (1.25 * std_dev_value)
threshold_lower = mean_value -( 3 * std_dev_value)

In [7]:
def label_anomalies(value):
    if value > threshold_upper or value < threshold_lower:
        return 1  
    else:
        return 0  



In [8]:
df['true_labels'] = df['value'].apply(label_anomalies)


In [9]:
df["true_labels"].value_counts()

true_labels
0    22042
1      653
Name: count, dtype: int64

# Essembled Based 

## Isolation Forest 

In [None]:
X = df[['value']].values


In [None]:
clf = IForest(contamination=0.1, random_state=42)

In [None]:
clf.fit(X)


In [None]:
y_pred_if = clf.predict(X) 

In [None]:
y_scores_if= clf.decision_function(X)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['value'], label='Time Series Data')
plt.scatter(df['timestamp'][y_pred_if== 1], df['value'][y_pred_if== 1], 
            color='red', marker='x', label='Detected Anomalies (HBOS)')
plt.scatter(df['timestamp'][df['true_labels'] == 1], df['value'][df['true_labels'] == 1], 
            color='blue', marker='o', label='Ground Truth Anomalies')
plt.title('Time Series Anomaly Detection using Isolation Forest (PyOD)')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
print(classification_report(df['true_labels'], y_pred_if))


In [None]:
accuracy = accuracy_score(df['true_labels'], y_pred_if)
accuracy*100

# Proximity Based 

## HBOS

In [None]:
clf_hbos = HBOS()
clf_hbos.fit(X)

In [None]:
y_predict_hbos = clf_hbos.predict(X)

In [None]:
y_scores_hbos=clf_hbos.decision_function(X)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['value'], label='Time Series Data')
plt.scatter(df['timestamp'][y_predict_hbos == 1], df['value'][y_predict_hbos == 1], 
            color='red', marker='x', label='Detected Anomalies (HBOS)')
plt.scatter(df['timestamp'][df['true_labels'] == 1], df['value'][df['true_labels'] == 1], 
            color='blue', marker='o', label='Ground Truth Anomalies')
plt.title('Time Series Anomaly Detection using HBOS (PyOD)')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
print(classification_report(df['true_labels'], y_predict_hbos ))


In [None]:
accuracy = accuracy_score(df['true_labels'], y_predict_hbos)
accuracy*100

## KNN 

In [None]:
clf_knn = KNN()
clf_knn.fit(X)

In [None]:
y_predict_knn = clf_knn.predict(X)
y_scores_knn = clf_knn.decision_function(X)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df['timestamp'], df['value'], label='Time Series Data')
plt.scatter(df['timestamp'][y_predict_knn == 1], df['value'][y_predict_knn == 1], 
            color='red', marker='x', label='Detected Anomalies (kNN)')
plt.scatter(df['timestamp'][df['true_labels'] == 1], df['value'][df['true_labels'] == 1], 
            color='blue', marker='o', label='Ground Truth Anomalies')
plt.title('Time Series Anomaly Detection using kNN (PyOD)')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.legend()
plt.show()

In [None]:
print(classification_report(df['true_labels'], y_predict_knn))


In [None]:
accuracy = accuracy_score(df['true_labels'], y_predict_knn)
accuracy*100

# Linear Model

# Neural Network 