In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pylab import rcParams
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Dense
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras import regularizers
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, precision_recall_curve
from sklearn.metrics import recall_score, classification_report, auc, roc_curve
from numpy.random import seed



In [2]:
# 랜덤 시드 고정
seed(1)
tf.random.set_seed(2)

# 설정 변수들
SEED = 123  # 랜덤 선택을 위한 시드
DATA_SPLIT_PCT = 0.2  # 데이터 분할 비율
LABELS = ["Normal", "Break"]  # 레이블

# 그래프 크기 설정
rcParams['figure.figsize'] = 8, 6

In [3]:
# 데이터셋 불러오기
df = pd.read_csv("C:/Users/Administrator/dataset/ae_data.csv")

# 특정 열 제거 ('time'과 범주형 열)
df = df.drop(['time', 'x28', 'x61'], axis=1)

df

Unnamed: 0,y,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x51,x52,x53,x54,x55,x56,x57,x58,x59,x60
0,0,0.376665,-4.596435,-4.095756,13.497687,-0.118830,-20.669883,0.000732,-0.061114,-0.059966,...,29.984624,10.091721,0.053279,-4.936434,-24.590146,18.515436,3.473400,0.033444,0.953219,0.006076
1,0,0.475720,-4.542502,-4.018359,16.230659,-0.128733,-18.758079,0.000732,-0.061114,-0.059966,...,29.984624,10.095871,0.062801,-4.937179,-32.413266,22.760065,2.682933,0.033536,1.090502,0.006083
2,0,0.363848,-4.681394,-4.353147,14.127997,-0.138636,-17.836632,0.010803,-0.061114,-0.030057,...,29.984624,10.100265,0.072322,-4.937924,-34.183774,27.004663,3.537487,0.033629,1.840540,0.006090
3,0,0.301590,-4.758934,-4.023612,13.161566,-0.148142,-18.517601,0.002075,-0.061114,-0.019986,...,29.984624,10.104660,0.081600,-4.938669,-35.954281,21.672449,3.986095,0.033721,2.554880,0.006097
4,0,0.265578,-4.749928,-4.333150,15.267340,-0.155314,-17.505913,0.000732,-0.061114,-0.030057,...,29.984624,10.109054,0.091121,-4.939414,-37.724789,21.907251,3.601573,0.033777,1.410494,0.006105
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18393,0,-0.877441,0.786430,0.406426,135.301215,0.112295,26.300392,-0.159185,0.058823,-0.080108,...,29.984624,-0.773514,-0.752385,2.682413,6.944644,-37.795661,-0.860218,0.010220,0.895685,-0.011242
18394,0,-0.843988,0.633086,0.561918,133.228949,0.141332,25.678597,-0.159185,0.058823,-0.080108,...,29.984624,-0.773514,-0.752385,2.683338,0.507755,-39.357199,-0.915698,0.010620,0.175348,-0.011235
18395,0,-0.826547,0.450126,0.334582,134.977973,0.170370,25.056801,-0.159185,0.048752,-0.080108,...,29.984624,-0.773514,-0.752385,2.684263,2.809146,-39.357199,-1.409596,0.013323,0.621020,-0.011228
18396,0,-0.822843,0.419383,0.387263,135.658942,0.199422,24.435005,-0.159185,0.048752,-0.080108,...,29.984624,-0.773514,-0.752385,2.685189,2.164859,-39.357199,-0.860218,0.012888,1.390902,-0.011221


In [4]:
# 데이터셋 분할: 학습, 검증, 테스트 데이터
df_train, df_test = train_test_split(df, test_size=DATA_SPLIT_PCT, random_state=SEED)
df_train, df_valid = train_test_split(df_train, test_size=DATA_SPLIT_PCT, random_state=SEED)

# 레이블 별로 데이터 분리 (정상 데이터와 이상 데이터)
df_train_0 = df_train[df_train['y'] == 0]
df_train_1 = df_train[df_train['y'] == 1]


In [5]:
# 레이블 제거 및 피처 데이터만 남김
df_train_0_x = df_train_0.drop(['y'], axis=1)
df_train_1_x = df_train_1.drop(['y'], axis=1)

df_valid_0 = df_valid[df_valid['y'] == 0]
df_valid_0_x = df_valid_0.drop(['y'], axis=1)

df_test_0 = df_test[df_test['y'] == 0]
df_test_0_x = df_test_0.drop(['y'], axis=1)

In [6]:
# 데이터 표준화 (스케일링)
scaler = StandardScaler().fit(df_train_0_x)
df_train_0_x_rescaled = scaler.transform(df_train_0_x)
df_valid_0_x_rescaled = scaler.transform(df_valid_0_x)
df_test_0_x_rescaled = scaler.transform(df_test_0_x)

df_valid_x_rescaled = scaler.transform(df_valid.drop(['y'], axis=1))
df_test_x_rescaled = scaler.transform(df_test.drop(['y'], axis=1))

In [7]:
# 오토인코더 모델 설정
input_dim = df_train_0_x_rescaled.shape[1]  # 입력 차원
encoding_dim = 32  # 임의로 설정한 압축 차원
hidden_dim = int(encoding_dim / 2)  # 숨겨진 차원
learning_rate = 1e-3  # 학습률

# 입력 레이어
input_layer = Input(shape=(input_dim, ))

# 인코더 구성
encoder = Dense(encoding_dim, activation="relu", 
                activity_regularizer=regularizers.l1(learning_rate))(input_layer)
encoder = Dense(hidden_dim, activation="relu")(encoder)

# 디코더 구성
decoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(encoding_dim, activation="relu")(decoder)
decoder = Dense(input_dim, activation="linear")(decoder)

# 오토인코더 모델
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.summary()

# 모델 컴파일
autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='adam')


In [8]:
import os
import shutil

# 로그 디렉토리 경로
log_dir = './logs'

# 기존 로그 디렉토리 삭제
if os.path.exists(log_dir):
    shutil.rmtree(log_dir)

# 새 로그 디렉토리 생성
os.makedirs(log_dir)


In [10]:
import os
from keras.layers import Input, Dense
from keras.models import Model
from keras.callbacks import ModelCheckpoint, TensorBoard

# 로그 디렉토리 생성
log_dir = './logs'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

# 입력 차원 정의
input_dim = df_train_0_x_rescaled.shape[1]  # 예측 변수의 수
encoding_dim = 32  # 인코딩 차원
hidden_dim = int(encoding_dim / 2)  # 히든 차원

# 오토인코더 모델 정의
input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)
encoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(hidden_dim, activation="relu")(encoder)
decoder = Dense(encoding_dim, activation="relu")(decoder)
decoder = Dense(input_dim, activation="linear")(decoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)

# 모델 요약 출력
autoencoder.summary()

# 모델 컴파일
autoencoder.compile(metrics=['accuracy'],
                    loss='mean_squared_error',
                    optimizer='adam')

# 모델 체크포인트 및 TensorBoard 콜백 설정
cp = ModelCheckpoint(filepath="autoencoder_classifier.keras", save_best_only=True, verbose=0)
tb = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True)

# 모델 학습
nb_epoch = 200  # 에포크 수
batch_size = 128  # 배치 사이즈

history = autoencoder.fit(df_train_0_x_rescaled, df_train_0_x_rescaled,
                          epochs=nb_epoch,
                          batch_size=batch_size,
                          shuffle=True,
                          validation_data=(df_valid_0_x_rescaled, df_valid_0_x_rescaled),
                          verbose=1,
                          callbacks=[cp, tb])


FailedPreconditionError: {{function_node __wrapped__CreateSummaryFileWriter_device_/job:localhost/replica:0/task:0/device:CPU:0}} ./logs is not a directory [Op:CreateSummaryFileWriter] name: 