In [None]:
# Import
import pandas as pd
import darts
from darts import TimeSeries
from darts.models import ExponentialSmoothing, ARIMA, Prophet
import matplotlib.pyplot as plt
from darts.utils.utils import SeasonalityMode
from darts.utils.statistics import plot_acf, plot_pacf, check_seasonality, extract_trend_and_seasonality, ModelMode, remove_trend, remove_from_series, stationarity_tests
import glob 
import os
from darts.utils.missing_values import fill_missing_values
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
import sys, os
from math import sqrt
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.seasonal import STL
from pyod.models.auto_encoder import AutoEncoder
from sklearn.metrics import f1_score

In [None]:
# Merging all files and making the dataframe
# df = pd.concat(map(pd.read_csv, glob.glob(os.path.join("./data" , "*.csv"))), ignore_index= True)
df = pd.read_csv('data/0.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df = df.set_index('timestamp')
df = df.asfreq(freq='T')
df = df.interpolate()

In [None]:
df_hourly = df.resample('H').mean()

In [None]:
scaler = StandardScaler()
np_scaled = scaler.fit_transform(df.value.values.reshape(-1, 1))
data = pd.DataFrame(np_scaled)

In [None]:
# Create a TimeSeries, specifying the time and value columns
series = TimeSeries.from_dataframe(df_hourly, value_cols='value')
# Set aside the last 20 percent of ts as a validation series
train, val = series[:-round(len(df_hourly) / 5)], series[-round(len(df_hourly) / 5):]

## AutoEncoder

In [None]:
clf = AutoEncoder(hidden_neurons =[1, 10, 10, 1])
clf.fit(data)

In [None]:
# Get the outlier scores for the train data
y_train_scores = clf.decision_scores_
# Predict the anomaly scores
y_test_scores = clf.decision_function(data)  # outlier scores
y_test_scores = pd.Series(y_test_scores)
 
# Plot it!
plt.hist(y_test_scores, bins='auto') 
plt.title("Histogram for Model Clf1 Anomaly Scores")
plt.show()

In [None]:
df_test = data.copy()
df_test['score'] = y_test_scores
df_test['cluster'] = np.where(df_test['score']<1.85, 0, 1)
df_test['cluster'].value_counts()
df_test.groupby('cluster').mean()

In [None]:
f1_score(df['label'].values.astype(int), df_test['cluster'].values)