#  Import Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
from collections import Counter
import tensorflow as tf
from tensorflow.keras import Model, models, layers, optimizers, regularizers
from tensorflow.keras.callbacks import ModelCheckpoint

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

seed=1
tf.random.set_seed(seed)

# Load Data

In [2]:
train = pd.read_csv('2_only_normal_threshold02.csv')
test = pd.read_csv('2_test.csv')

In [3]:
train.drop(['Unnamed: 0', 'Unnamed: 0.1'], inplace=True, axis=1)
test.drop(['Unnamed: 0', 'Timestamp'], inplace=True, axis=1)

In [4]:
shift_by = -5
n_features = train.shape[1]

# Transform to Series Data 

In [5]:
shift_by = -5
n_features = train.shape[1]

In [6]:
# LSTM 모델은 (samples, timesteps, feature)에 해당하는 3d 차원의 shape을 가지므로, 데이터를 시퀀스 형태로 변환

def temporalize(X, timesteps):
    output_X = []
    for i in range(len(X) - timesteps - 1):
        t = []
        for j in range(1, timesteps + 1):
            # Gather the past records upto the lookback period
            t.append(X.loc[[(i + j + 1)], :])
        output_X.append(t)
    return np.squeeze(np.array(output_X))

In [7]:
timesteps = abs(shift_by)

# Temporalize
x = temporalize(train, timesteps)
print(x.shape)

(167487, 5, 33)


# Split Train / Valid / Test

In [8]:
# Split into train, valid, and test 
x_train = x
x_test = temporalize(test, timesteps)
x_train, x_valid = train_test_split(x_train, test_size=0.2)

print(len(x_train))  
print(len(x_valid)) 
print(len(x_test))

133989
33498
105114


# Standardize

In [9]:
def flatten(X):
    flattened_X = np.empty((X.shape[0], X.shape[2]))  # sample x features array.
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, (X.shape[1]-1), :]
    return(flattened_X)

def scale(X, scaler):
    for i in range(X.shape[0]):
        X[i, :, :] = scaler.transform(X[i, :, :])
        
    return X

In [10]:
scaler = MinMaxScaler().fit(flatten(x_train))

x_train_scaled = scale(x_train, scaler)
x_valid_scaled = scale(x_valid, scaler)
x_test_scaled = scale(x_test, scaler)

# Training LSTM Autoencoder

In [11]:
epochs = 10
batch = 128
lr = 0.001

In [12]:
# earlystopping
callback = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)

lstm_ae = models.Sequential()
# Encoder
lstm_ae.add(layers.LSTM(64, activation='relu', input_shape=(timesteps, n_features), return_sequences=True))
lstm_ae.add(layers.LSTM(32, activation='relu', return_sequences=False))
lstm_ae.add(layers.RepeatVector(timesteps))
# Decoder
lstm_ae.add(layers.LSTM(32, activation='relu', return_sequences=True))
lstm_ae.add(layers.LSTM(64, activation='relu', return_sequences=True))
lstm_ae.add(layers.TimeDistributed(layers.Dense(n_features)))

lstm_ae.summary()

Metal device set to: Apple M1

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 5, 64)             25088     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 repeat_vector (RepeatVector  (None, 5, 32)            0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 5, 32)             8320      
                                                                 
 lstm_3 (LSTM)               (None, 5, 64)             24832     
                                                                 
 time_distributed (TimeDistr  (None, 5, 33)   

2022-05-05 23:28:44.098374: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-05-05 23:28:44.098482: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [13]:
# compile
lstm_ae.compile(loss="mse", optimizer=optimizers.Adam(lr))

# fit
history = lstm_ae.fit(
    x_train_scaled,
    x_train_scaled,
    epochs=epochs,
    batch_size=batch,
    validation_data=(x_valid_scaled, x_valid_scaled),
    callbacks=[callback],
)

Epoch 1/10


2022-05-05 23:28:44.382578: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-05-05 23:28:45.145242: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




2022-05-05 23:31:26.117972: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# threshold

In [363]:
threshold = 0.19

In [15]:
test.shape

(105120, 33)

In [16]:
test_predictions = lstm_ae.predict(x_test_scaled)

2022-05-05 23:58:00.915509: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


In [17]:
mse = np.sqrt(np.mean(np.power(test_predictions.reshape(test_predictions.shape[0], test_predictions.shape[1] * test_predictions.shape[2]) - x_test_scaled.reshape(test_predictions.shape[0], test_predictions.shape[1] * test_predictions.shape[2]), 2)))
print(mse)

0.030624128731011255


In [364]:
predict = []
for i in range(len(x_test_scaled)):
    if np.sqrt(np.mean(np.power(test_predictions.reshape(test_predictions.shape[0], test_predictions.shape[1] * test_predictions.shape[2])[i] - x_test_scaled.reshape(test_predictions.shape[0], test_predictions.shape[1] * test_predictions.shape[2])[i], 2))) > threshold:
        predict.append(1)
    else:
        predict.append(0)

In [365]:
predict = pd.DataFrame(predict, columns=['Prediction'])
predict.value_counts()

Prediction
0             104978
1                136
dtype: int64

In [366]:
predict.to_csv('2_predict_all_threshold 0_19.csv', mode='w')

# 개별 계산

In [234]:
for i in range(10):
    globals()[f'check_{i + 1}'] = np.sqrt(np.mean(np.power(check.iloc[:, [0 + i, 1 + i, 2 + i, 33 + i, 34 + i, 35 + i, 66 + i, 67 + i, 68 + i, 99 + i, 100 + i, 101 + i, 132 + i, 133 + i, 134 + i]], 2), axis=1))
    globals()[f'prediction_{i + 1}'] = []
    for j in range(len(globals()[f'check_{i + 1}'])):
        if globals()[f'check_{i + 1}'][j] > threshold:
            globals()[f'prediction_{i + 1}'].append(1)
        else:
            globals()[f'prediction_{i + 1}'].append(0)

In [235]:
prediction_11 = []
prediction_12 = []
prediction_13 = []
temp_1 = np.sqrt(np.mean(np.power(check.iloc[:, [10, 43, 76, 109, 142]], 2), axis=1))
temp_2 = np.sqrt(np.mean(np.power(check.iloc[:, [11, 44, 77, 110, 143]], 2), axis=1))
temp_3 = np.sqrt(np.mean(np.power(check.iloc[:, [12, 45, 78, 111, 144]], 2), axis=1))

for i in range(len(check)):
    if temp_1[i] > threshold:
        prediction_11.append(1)
    else:
        prediction_11.append(0)

for i in range(len(check)):
    if temp_2[i] > threshold:
        prediction_12.append(1)
    else:
        prediction_12.append(0)

for i in range(len(check)):
    if temp_3[i] > threshold:
        prediction_13.append(1)
    else:
        prediction_13.append(0)

In [236]:
prediction = pd.DataFrame([0 * x for x in range(len(temp_1))])
for i in range(13):
    prediction = prediction + pd.DataFrame(globals()[f'prediction_{i + 1}'])

In [237]:
prediction.value_counts()

0     86469
2     11481
10     3540
11     1411
1       921
12      438
3       191
5       156
4       137
7       120
8        76
6        65
13       64
9        45
dtype: int64

In [238]:
prediction.columns = ['Prediction']
prediction.loc[prediction['Prediction'] > 1, 'Prediction'] = 1
prediction.value_counts()

Prediction
0             86469
1             18645
dtype: int64

In [239]:
prediction.to_csv('2_predict_threshold 0_3.csv', mode='w')