# Anomaly Detection using Tensorflow Auto Encoder

## Download images

In [None]:
!aws s3 cp 's3://<Bucket>/<Prefix>/' ./normal --recursive

## Load data

### train data

In [None]:
from PIL import Image
import numpy as np
import os

size = 512

dirname = './train/'
filelist = np.array(list(map(lambda x: dirname + x, os.listdir(dirname))))
num_all = len(filelist)
num_test = int(num_all*0.2)

id_all   = np.random.choice(num_all, num_all, replace=False)
id_valid  = id_all[0:num_test]
id_train = id_all[num_test:num_all]

file_train = filelist[id_train]
file_valid = filelist[id_valid]


train_data = np.array([np.array(Image.open(i).resize((size, size))).astype('float32') / 255 for i in file_train])[:,:,:,:3]
valid_data = np.array([np.array(Image.open(i).resize((size, size))).astype('float32') / 255 for i in file_valid])[:,:,:,:3]

print(np.shape(train_data))
print(np.shape(valid_data))

### test data

In [None]:
dirname = './test/Anomaly/'
filelist_anomaly = np.array(list(map(lambda x: dirname + x, os.listdir(dirname))))

test_data_anomaly = np.array([np.array(Image.open(i).resize((size, size))).astype('float32') / 255 for i in filelist_anomaly])[:,:,:,:3]

print(np.shape(test_data_anomaly))

In [None]:
dirname = './test/Normal/'
filelist_normal = np.array(list(map(lambda x: dirname + x, os.listdir(dirname))))

test_data_normal = np.array([np.array(Image.open(i).resize((size, size))).astype('float32') / 255 for i in filelist_normal])[:,:,:,:3]

print(np.shape(test_data_normal))

## Save data

In [None]:
np.save('./x_train', train_data)
np.save('./x_valid', valid_data)
np.save('./x_test_anomaly', test_data_anomaly)
np.save('./x_test_normal', test_data_normal)

## Upload data to S3

In [None]:
from sagemaker import get_execution_role
import sagemaker

role = get_execution_role()

sagemaker_session = sagemaker.Session()
input_train = sagemaker_session.upload_data(path='x_train.npy', key_prefix='sagemaker/autoencoder-test')
input_valid = sagemaker_session.upload_data(path='x_valid.npy', key_prefix='sagemaker/autoencoder-test')

In [None]:
import os
input_dir = os.path.dirname(input_train)
input_dir

## Training

In [None]:
from sagemaker.tensorflow import TensorFlow
ae_estimator = TensorFlow(entry_point='autoencoder_color.py',
                             role=role,
                             train_instance_count=1,
                             train_instance_type='ml.p3.2xlarge',
#                             train_instance_type='local',
                             framework_version='2.1.0',
                             py_version='py3',
                             debugger_hook_config=False,
                             hyperparameters={'epoch':1000, 'size':size, 'train_data_name':'x_train.npy', 'valid_data_name':'x_valid.npy'},
                             distributions={'parameter_server': {'enabled': True}})

In [None]:
ae_estimator.fit( input_dir, wait=False)

## Deploy model

In [None]:
predictor = ae_estimator.deploy(initial_instance_count=1, instance_type='ml.p2.xlarge')

すでに起動済みのエンドポイントを使用する場合はこちらを実行する。情報が上書きされてしまうため、上記セルで新しいエンドポイントを起動した直後にこちらを実行しないこと。

In [None]:
from sagemaker.predictor import RealTimePredictor
from sagemaker.tensorflow.model import TensorFlowPredictor
from sagemaker.predictor import numpy_deserializer, npy_serializer
predictor = TensorFlowPredictor('<Endpoint name>')

# Predict

## Anomaly data

In [None]:
n = 10
decoded_imgs_anomaly = []
shape = np.shape(test_data_anomaly)

for i in range(n):
    tmp = test_data_anomaly[i].reshape(1, shape[1], shape[2], shape[3])
    predictions = predictor.predict(tmp)
    decoded_imgs_anomaly.append(predictions['predictions'][0])

## Normal data

In [None]:
decoded_imgs_normal = []
shape = np.shape(test_data_normal)

for i in range(n):
    tmp = test_data_normal[i].reshape(1, shape[1], shape[2], shape[3])
    predictions = predictor.predict(tmp)
    decoded_imgs_normal.append(predictions['predictions'][0])

# Visualize results

## Define functions

結果表示用関数の定義

In [None]:
from scipy import signal
import matplotlib.pyplot as plt

def plot(image, output, threshold, filename):

    output = np.array(output)*255
    img = np.array(image)*255
    
    diff = np.abs(img-output).astype('uint8')
    tmp = diff.sum(axis=2)
    edge = detect_edge(img)
    
    
    H = signal.convolve2d(tmp, np.ones((5,5)), mode='same')
    
    # 後処理としてエッジ部分を除外
    H = H - edge*1000
    H = np.where(H < 0, 0, H)
    
    x,y = np.where(H > threshold)
    
    fig, (ax0, ax1, ax2,ax3,ax4) = plt.subplots(ncols=5, figsize=(16, 4))
    ax0.set_axis_off()
    ax1.set_axis_off()
    ax2.set_axis_off()
    ax3.set_axis_off()
    
    ax0.set_title(filename[:10])
    ax1.set_title('reconstructed image')
    ax2.set_title('diff ')
    ax3.set_title('mask')
    ax4.set_title('anomalies: '+str(len(x)))
    
    ax0.imshow(img.astype(int), interpolation='nearest') 
    ax1.imshow(output.astype(int), interpolation='nearest')   
    ax2.imshow((diff*3).astype(int), cmap=plt.cm.viridis, vmin=0, vmax=255, interpolation='nearest')  
    ax3.imshow(edge.astype(int), interpolation='nearest', cmap='gray')
    ax4.imshow(img.astype(int), interpolation='nearest')
    
    ax4.scatter(y,x,color='red',s=0.1) 

    plt.axis('off')
    
    # 結果をノートブックに出力する場合
#     plt.show()
    
    # 結果を画像として出力する場合
    plt.savefig('res_'+filename, dpi=300)
    
    return len(x)


後処理のエッジ抽出関数の定義

In [None]:
import cv2

def detect_edge(img):
    minVal = 300
    maxVal = 400
    SobelSize = 10

    img = cv2.cvtColor((np.array(img)).astype('uint8'), cv2.COLOR_RGB2GRAY)

    edges = cv2.Canny(img,minVal,maxVal,SobelSize)
    kernel = np.ones((1,1),np.uint8)
    res = cv2.morphologyEx(edges, cv2.MORPH_OPEN, kernel)
    kernel = np.ones((20,20),np.uint8)
    # closing = cv2.morphologyEx(res, cv2.MORPH_CLOSE, kernel)
    res = cv2.dilate(res,kernel,iterations = 1)
    
    return res

## Predict anomaly images

In [None]:
# 入力画像と再構成画像の差分がいくつより大きければ異常部分とみなすかを決めるしきい値。値が大きいほど異常部分とみなしにくくなる。
threshold = 700

In [None]:
anomalies_anomaly = []
for i in range(n):
    anomalies = plot(test_data_anomaly[i], decoded_imgs_anomaly[i], threshold, os.path.basename(filelist_anomaly[i]))
    anomalies_anomaly.append(anomalies)

## Predict normal images

In [None]:
anomalies_normal = []
for i in range(n):
    anomalies = plot(test_data_normal[i], decoded_imgs_normal[i], threshold, os.path.basename(filelist_normal[i]))
    anomalies_normal.append(anomalies)

## Compare the value of anomalies

In [None]:
print('Anomaly images: ', anomalies_anomaly, np.mean(anomalies_anomaly))
print('Normal images: ', anomalies_normal, np.mean(anomalies_normal))