In [47]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, Model
import matplotlib.pyplot as plt 
import seaborn as sns
import os
import glob
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

In [48]:
'''
        Each file contains nine columns and a different number of rows depending on the test length.
        
        1st column is the acceleration data in the X axis measured by the sensor ADXL345.
        2nd column is the acceleration data in the Y axis measured by the sensor ADXL345.
        3rd column is the acceleration data in the Z axis measured by the sensor ADXL345.

        4th column is the rotation data in the X axis measured by the sensor ITG3200.
        5th column is the rotation data in the Y axis measured by the sensor ITG3200.
        6th column is the rotation data in the Z axis measured by the sensor ITG3200.

        7th column is the acceleration data in the X axis measured by the sensor MMA8451Q.
        8th column is the acceleration data in the Y axis measured by the sensor MMA8451Q.
        9th column is the acceleration data in the Z axis measured by the sensor MMA8451Q.
        
        Data are in bits with the following characteristics:

        ADXL345:
        Resolution: 13 bits
        Range: +-16g

        ITG3200
        Resolution: 16 bits
        Range: +-2000°/s

        MMA8451Q:
        Resolution: 14 bits
        Range: +-8g
'''

'\n        Each file contains nine columns and a different number of rows depending on the test length.\n        \n        1st column is the acceleration data in the X axis measured by the sensor ADXL345.\n        2nd column is the acceleration data in the Y axis measured by the sensor ADXL345.\n        3rd column is the acceleration data in the Z axis measured by the sensor ADXL345.\n\n        4th column is the rotation data in the X axis measured by the sensor ITG3200.\n        5th column is the rotation data in the Y axis measured by the sensor ITG3200.\n        6th column is the rotation data in the Z axis measured by the sensor ITG3200.\n\n        7th column is the acceleration data in the X axis measured by the sensor MMA8451Q.\n        8th column is the acceleration data in the Y axis measured by the sensor MMA8451Q.\n        9th column is the acceleration data in the Z axis measured by the sensor MMA8451Q.\n        \n        Data are in bits with the following characteristics:\

In [49]:
datapath = "./data/SisFall.csv"

df = pd.read_csv(datapath, index_col=0)
df.head(10)

Unnamed: 0,ADXL_x,ADXL_y,ADXL_z,ITG_x,ITG_y,ITG_z,MMA_x,MMA_y,MMA_z,subject,activity,trial
,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,SA01,D01,R01
0.0,17.0,-179.0,-99.0,-18.0,-504.0,-352.0,76.0,-697.0,-279.0,SA01,D01,R01
1.0,15.0,-174.0,-90.0,-53.0,-568.0,-306.0,48.0,-675.0,-254.0,SA01,D01,R01
2.0,1.0,-176.0,-81.0,-84.0,-613.0,-271.0,-2.0,-668.0,-221.0,SA01,D01,R01
3.0,-10.0,-180.0,-77.0,-104.0,-647.0,-227.0,-34.0,-697.0,-175.0,SA01,D01,R01
4.0,-21.0,-191.0,-63.0,-128.0,-675.0,-191.0,-74.0,-741.0,-133.0,SA01,D01,R01
5.0,-37.0,-225.0,-59.0,-146.0,-700.0,-159.0,-110.0,-840.0,-103.0,SA01,D01,R01
6.0,-36.0,-243.0,-46.0,-166.0,-722.0,-131.0,-147.0,-948.0,-75.0,SA01,D01,R01
7.0,-44.0,-271.0,-38.0,-190.0,-738.0,-107.0,-171.0,-1073.0,-38.0,SA01,D01,R01
8.0,-51.0,-312.0,-33.0,-210.0,-752.0,-90.0,-200.0,-1191.0,3.0,SA01,D01,R01


In [50]:
print (df.columns)
print (set(df['subject']))
features = df.copy()

dnum = 200
fnum = 365

Index(['ADXL_x', 'ADXL_y', 'ADXL_z', 'ITG_x', 'ITG_y', 'ITG_z', 'MMA_x',
       'MMA_y', 'MMA_z', 'subject', 'activity', 'trial'],
      dtype='object')
{'SE04', 'SA08', 'SA07', 'SA13', 'SE10', 'SA10', 'SA17', 'SA19', 'SE01', 'SA06', 'SA05', 'SA04', 'SA14', 'SE15', 'SA09', 'SA11', 'SA20', 'SE14', 'SE13', 'SE05', 'SA22', 'SA21', 'SA03', 'SE02', 'SA16', 'SE08', 'SE06', 'SA23', 'SA01', 'SA12', 'SE09', 'SE11', 'SA02', 'SA15', 'SA18', 'SE03', 'SE07', 'SE12'}


In [51]:
D01 = features[features['activity']=='D01'].head(dnum).copy()
D02 = features[features['activity']=='D02'].head(dnum).copy()
D03 = features[features['activity']=='D03'].head(dnum).copy()
D04 = features[features['activity']=='D04'].head(dnum).copy()
D05 = features[features['activity']=='D05'].head(dnum).copy()
D06 = features[features['activity']=='D06'].head(dnum).copy()
D07 = features[features['activity']=='D07'].head(dnum).copy()
D08 = features[features['activity']=='D08'].head(dnum).copy()
D09 = features[features['activity']=='D09'].head(dnum).copy()
D10 = features[features['activity']=='D10'].head(dnum).copy()
D11 = features[features['activity']=='D11'].head(dnum).copy()
D12 = features[features['activity']=='D12'].head(dnum).copy()
D13 = features[features['activity']=='D13'].head(dnum).copy()
D14 = features[features['activity']=='D14'].head(dnum).copy()
D15 = features[features['activity']=='D15'].head(dnum).copy()
D16 = features[features['activity']=='D16'].head(dnum).copy()
D17 = features[features['activity']=='D17'].head(dnum).copy()
D18 = features[features['activity']=='D18'].head(dnum).copy()
D19 = features[features['activity']=='D19'].head(dnum).copy()

In [52]:
F01 = features[features['activity']=='F01'].head(fnum).copy()
F02 = features[features['activity']=='F02'].head(fnum).copy()
F03 = features[features['activity']=='F03'].head(fnum).copy()
F04 = features[features['activity']=='F04'].head(fnum).copy()
F05 = features[features['activity']=='F05'].head(fnum).copy()
F06 = features[features['activity']=='F06'].head(fnum).copy()
F07 = features[features['activity']=='F07'].head(fnum).copy()
F08 = features[features['activity']=='F08'].head(fnum).copy()
F09 = features[features['activity']=='F09'].head(fnum).copy()
F10 = features[features['activity']=='F10'].head(fnum).copy()
F11 = features[features['activity']=='F11'].head(fnum).copy()
F12 = features[features['activity']=='F12'].head(fnum).copy()
F13 = features[features['activity']=='F13'].head(fnum).copy()
F14 = features[features['activity']=='F14'].head(fnum).copy()
F15 = features[features['activity']=='F15'].head(fnum).copy()

In [53]:
df = pd.DataFrame()
df = pd.concat([df, F01, F02, F03, F04, F05, F06, F07, F08, F09, F10, F11, F12, F13, F14, F15])
df = pd.concat([df, D01, D02, D03, D04, D05, D06, D07, D08, D09, D10, D11, D15, D16, D17, D18, D19])

In [54]:
df['activity'].value_counts()

activity
F01    365
F09    365
F02    365
F15    365
F14    365
F13    365
F11    365
F10    365
F12    365
F08    365
F07    365
F06    365
F05    365
F04    365
F03    365
D09    200
D18    200
D17    200
D16    200
D15    200
D11    200
D10    200
D01    200
D08    200
D07    200
D06    200
D05    200
D04    200
D03    200
D02    200
D19    200
Name: count, dtype: int64

In [55]:
features = df.copy()

In [56]:
features = features.replace({'activity':'D01'},0)
features = features.replace({'activity':'D01'},0)
features = features.replace({'activity':'D02'},0)
features = features.replace({'activity':'D03'},0)
features = features.replace({'activity':'D04'},0)
features = features.replace({'activity':'D05'},0)
features = features.replace({'activity':'D06'},0)
features = features.replace({'activity':'D07'},0)
features = features.replace({'activity':'D08'},0)
features = features.replace({'activity':'D09'},0)
features = features.replace({'activity':'D10'},0)
features = features.replace({'activity':'D11'},0)
features = features.replace({'activity':'D12'},0)
features = features.replace({'activity':'D13'},0)
features = features.replace({'activity':'D14'},0)
features = features.replace({'activity':'D15'},0)
features = features.replace({'activity':'D16'},0)
features = features.replace({'activity':'D17'},0)
features = features.replace({'activity':'D18'},0)
features = features.replace({'activity':'D19'},0)
features = features.replace({'activity':'F01'},1)
features = features.replace({'activity':'F02'},1)
features = features.replace({'activity':'F03'},1)
features = features.replace({'activity':'F04'},1)
features = features.replace({'activity':'F05'},1)
features = features.replace({'activity':'F06'},1)
features = features.replace({'activity':'F07'},1)
features = features.replace({'activity':'F08'},1)
features = features.replace({'activity':'F09'},1)
features = features.replace({'activity':'F10'},1)
features = features.replace({'activity':'F11'},1)
features = features.replace({'activity':'F12'},1)
features = features.replace({'activity':'F13'},1)
features = features.replace({'activity':'F14'},1)
features = features.replace({'activity':'F15'},1)
features = features.replace({'activity':'F15'},1)

  features = features.replace({'activity':'F15'},1)


In [57]:
features['activity'].value_counts()

activity
1    5475
0    3200
Name: count, dtype: int64

In [58]:
features

Unnamed: 0,ADXL_x,ADXL_y,ADXL_z,ITG_x,ITG_y,ITG_z,MMA_x,MMA_y,MMA_z,subject,activity,trial
,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,SA01,1,R01
0.0,-9.0,-257.0,-25.0,84.0,247.0,27.0,-120.0,-987.0,63.0,SA01,1,R01
1.0,-3.0,-263.0,-23.0,99.0,258.0,35.0,-110.0,-1016.0,68.0,SA01,1,R01
2.0,-1.0,-270.0,-22.0,114.0,272.0,45.0,-94.0,-1037.0,69.0,SA01,1,R01
3.0,1.0,-277.0,-24.0,127.0,286.0,57.0,-81.0,-1062.0,69.0,SA01,1,R01
...,...,...,...,...,...,...,...,...,...,...,...,...
194.0,4.0,-259.0,-21.0,-56.0,2.0,-5.0,18.0,-1008.0,35.0,SA01,0,R01
195.0,9.0,-261.0,-27.0,-59.0,2.0,-5.0,23.0,-1014.0,35.0,SA01,0,R01
196.0,4.0,-261.0,-25.0,-62.0,3.0,-5.0,20.0,-1011.0,33.0,SA01,0,R01
197.0,4.0,-262.0,-21.0,-65.0,3.0,-4.0,18.0,-1009.0,36.0,SA01,0,R01


In [59]:
print (set(features['subject']))

{'SA01'}


In [60]:
features = features[[ft for ft in features.columns if ft not in ['subject', 'trial']]]
print(features.columns)

Index(['ADXL_x', 'ADXL_y', 'ADXL_z', 'ITG_x', 'ITG_y', 'ITG_z', 'MMA_x',
       'MMA_y', 'MMA_z', 'activity'],
      dtype='object')


In [61]:
from sklearn.preprocessing import StandardScaler

feature_columns = [col for col in features.columns if col != 'activity']

scaler = StandardScaler()

features_scaled = features.copy()
features_scaled[feature_columns] = scaler.fit_transform(features[feature_columns])

In [62]:
adl_count = features_scaled['activity'].value_counts()[0]
fall_count = features_scaled['activity'].value_counts()[1]

adl_mask = features_scaled['activity'] == 0
adl_data = features_scaled[adl_mask]
adl1 = adl_data.iloc[:int(adl_count * 0.6)]
adl2 = adl_data.iloc[int(adl_count * 0.6):]

fall_mask = features_scaled['activity'] == 1
fall_data = features_scaled[fall_mask]
fall1 = fall_data.iloc[:int(fall_count * 0.6)]
fall2 = fall_data.iloc[int(fall_count * 0.6):]

print(adl1.shape, adl2.shape, fall1.shape, fall2.shape)

(1920, 10) (1280, 10) (3285, 10) (2190, 10)


In [63]:
def split_sequences(sequences, n_steps):
    X, y = [], []
    for i in range(len(sequences) - n_steps + 1):
        end_ix = i + n_steps
        
        seq_x = sequences.iloc[i:end_ix].iloc[:, :-1] 
        seq_y = sequences.iloc[end_ix-1].iloc[-1]     
        
        X.append(seq_x.values)  
        y.append(seq_y)
    
    return np.array(X), np.array(y)

In [64]:
WINDOW_LENGTH = 20
train_data, train_labels = split_sequences(adl1, WINDOW_LENGTH)
test_data, test_labels = split_sequences(pd.concat([adl2, fall2]), WINDOW_LENGTH)
print (f"after : {train_data.shape}, {test_data.shape}")

after : (1901, 20, 9), (3451, 20, 9)


-----

In [65]:
# Input:      (256, 9)
# Level 1:    (128, 64)   # spatial ↓, channels ↑
# Level 2:    (64, 128)   # spatial ↓, channels ↑
# Level 3:    (32, 256)   # spatial ↓, channels ↑
# Level 4:    (16, 512)   # spatial ↓, channels ↑
#     (16, 1024)
# Level 4:    (32, 512)   # spatial ↑, channels ↓
# Level 3:    (64, 256)   # spatial ↑, channels ↓
# Level 2:    (128, 128)  # spatial ↑, channels ↓
# Level 1:    (256, 64)   # spatial ↑, channels ↓
# Output:     (256, 9)

In [66]:
class TimeEmbedding(tf.keras.layers.Layer):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim
        
    def call(self, time):
        half_dim = self.dim // 2
        embeddings = tf.math.log(10000.) / (half_dim - 1)
        embeddings = tf.exp(tf.range(half_dim, dtype=tf.float32) * -embeddings)
        embeddings = tf.cast(time, tf.float32)[:, None] * embeddings[None, :]
        embeddings = tf.concat([tf.sin(embeddings), tf.cos(embeddings)], axis=-1)
        return embeddings

In [67]:
"""Double convolution block for U-Net"""
class DoubleConv(tf.keras.layers.Layer):
    def __init__(self, filters):
        super(DoubleConv, self).__init__()
        # 더 넓은 커널과 더 많은 필터 사용
        self.conv1 = tf.keras.layers.Conv1D(filters, 5, padding='same')  # kernel size 3 -> 5
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv1D(filters, 5, padding='same')
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv3 = tf.keras.layers.Conv1D(filters, 1, padding='same')  # 추가 1x1 conv
        self.bn3 = tf.keras.layers.BatchNormalization()
        self.relu = tf.keras.layers.ReLU()
        self.dropout = tf.keras.layers.Dropout(0.2)  # dropout 증가
        
        # 시간 정보를 처리하기 위한 더 큰 projection
        self.time_proj = tf.keras.layers.Dense(filters)
    
    def call(self, inputs):
        x, t = inputs
        
        # First conv block
        h = self.conv1(x)
        h = self.bn1(h)
        
        # 시간 정보 추가
        time_emb = self.time_proj(t)[:, None, :]
        h = h + time_emb
        
        h = self.relu(h)
        h = self.dropout(h)
        
        # Second conv block
        h = self.conv2(h)
        h = self.bn2(h)
        h = self.relu(h)
        h = self.dropout(h)
        
        # Additional 1x1 conv
        h = self.conv3(h)
        h = self.bn3(h)
        h = self.relu(h)
        h = self.dropout(h)
        
        return h

In [73]:
class UNetDiffusion(tf.keras.Model):
    def __init__(self, time_dim=512):  # time embedding dimension 증가
        super(UNetDiffusion, self).__init__()
        
        # Time embedding with larger dimensions
        self.time_mlp = tf.keras.Sequential([
            TimeEmbedding(time_dim),
            tf.keras.layers.Dense(time_dim * 2),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Dense(time_dim * 2)  # 더 큰 차원으로 변경
        ])
        
        # Encoder layers with more channels
        self.enc1_conv = DoubleConv(64)  # 8 -> 64
        self.enc1_pool = tf.keras.layers.MaxPooling1D(2)
        
        self.enc2_conv = DoubleConv(128)  # 16 -> 128
        self.enc2_pool = tf.keras.layers.MaxPooling1D(2)
        
        self.enc3_conv = DoubleConv(256)  # 새로운 레이어 추가
        self.enc3_pool = tf.keras.layers.MaxPooling1D(2)
        
        # Bottleneck with more channels
        self.bottleneck = DoubleConv(512)  # 32 -> 512
        
        # Decoder layers with more channels
        self.dec3_up = tf.keras.layers.UpSampling1D(2)
        self.dec3_conv = DoubleConv(256)
        
        self.dec2_up = tf.keras.layers.UpSampling1D(2)
        self.dec2_conv = DoubleConv(128)
        
        self.dec1_up = tf.keras.layers.UpSampling1D(2)
        self.dec1_conv = DoubleConv(64)
        
        self.final = tf.keras.layers.Conv1D(9, 1, padding='same')
        
    def call(self, x, t, training=None):
        # Time embedding
        t = self.time_mlp(t)
        
        # Encoder path with skip connections
        enc1_before_pool = x
        x = self.enc1_conv([x, t])
        enc1_output = self.enc1_pool(x)
        
        enc2_before_pool = enc1_output
        x = self.enc2_conv([enc1_output, t])
        enc2_output = self.enc2_pool(x)
        
        enc3_before_pool = enc2_output
        x = self.enc3_conv([enc2_output, t])
        enc3_output = self.enc3_pool(x)
        
        # Bottleneck
        x = self.bottleneck([enc3_output, t])
        
        # Decoder path with skip connections
        x = self.dec3_up(x)
        x = self.dec3_conv([x, t])
        x = tf.concat([enc3_before_pool, x], axis=-1)
        
        x = self.dec2_up(x)
        x = self.dec2_conv([x, t])
        x = tf.concat([enc2_before_pool, x], axis=-1)
        
        x = self.dec1_up(x)
        x = self.dec1_conv([x, t])
        x = tf.concat([enc1_before_pool, x], axis=-1)
        
        return self.final(x)

In [69]:
def linear_beta_schedule(timesteps):
    beta_start = 0.0001
    beta_end = 0.02
    return np.linspace(beta_start, beta_end, timesteps)

def get_index_from_list(vals, t, x_shape):
    batch_size = t.shape[0]
    out = tf.gather(vals, t)
    # reshape to match input dimensions [batch_size, sequence_length, features]
    return tf.reshape(out, [batch_size, 1, 1])

def forward_diffusion(x_0, t):
    # Get actual batch size from input
    actual_batch_size = tf.shape(x_0)[0]
    
    # 데이터 타입을 일치시킵니다
    x_0 = tf.cast(x_0, tf.float32)
    noise = tf.random.normal(shape=tf.shape(x_0), dtype=tf.float32)
    
    # t 텐서의 크기를 실제 배치 크기에 맞춥니다
    t = t[:actual_batch_size]
    
    # sqrt_alphas_cumprod와 sqrt_one_minus_alphas_cumprod의 데이터 타입을 확인하고 맞춰줍니다
    sqrt_alphas_cumprod_t = tf.cast(get_index_from_list(sqrt_alphas_cumprod, t, x_0.shape), tf.float32)
    sqrt_one_minus_alphas_cumprod_t = tf.cast(get_index_from_list(sqrt_one_minus_alphas_cumprod, t, x_0.shape), tf.float32)
    
    # Broadcast the coefficients to match x_0's shape
    sqrt_alphas_cumprod_t = tf.broadcast_to(sqrt_alphas_cumprod_t, tf.shape(x_0))
    sqrt_one_minus_alphas_cumprod_t = tf.broadcast_to(sqrt_one_minus_alphas_cumprod_t, tf.shape(x_0))
    
    return (
        sqrt_alphas_cumprod_t * x_0 + sqrt_one_minus_alphas_cumprod_t * noise,
        noise
    )

timesteps = 1000
betas = linear_beta_schedule(timesteps)
betas = tf.cast(betas, tf.float32)
alphas = 1. - betas
alphas_cumprod = tf.math.cumprod(alphas)
sqrt_alphas_cumprod = tf.sqrt(alphas_cumprod)
sqrt_one_minus_alphas_cumprod = tf.sqrt(1. - alphas_cumprod)

In [70]:
BATCH_SIZE = 64
EPOCHS = 50
INITIAL_LEARNING_RATE = 0.005
DECAY_STEPS = 500
DECAY_RATE = 0.95

In [71]:
model = UNetDiffusion()
optimizer = tf.keras.optimizers.Adam(
    tf.keras.optimizers.schedules.ExponentialDecay(
        INITIAL_LEARNING_RATE, DECAY_STEPS, DECAY_RATE
    )
)

In [74]:
from tqdm.auto import tqdm

train_dataset = tf.data.Dataset.from_tensor_slices(train_data.astype('float32'))
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=True)  # drop_remainder=True 추가

for epoch in range(EPOCHS):
    train_loss = 0
    n_batches = len(train_data) // BATCH_SIZE
    
    pbar = tqdm(enumerate(train_dataset), 
                desc=f'Epoch {epoch+1}/{EPOCHS}', 
                total=n_batches,
                leave=False)
    
    for i, batch_data in pbar:
        # 랜덤 타임스텝 샘플링
        t = tf.random.uniform(
            shape=(tf.shape(batch_data)[0],),  # 실제 배치 크기 사용
            minval=0, 
            maxval=timesteps, 
            dtype=tf.int32
        )
        
        with tf.GradientTape() as tape:
            x_noisy, noise = forward_diffusion(batch_data, t)
            pred_noise = model(x_noisy, t, training=True)
            loss = tf.reduce_mean(tf.square(noise - pred_noise))
        
        grads = tape.gradient(loss, model.trainable_variables)
        grads, _ = tf.clip_by_global_norm(grads, clip_norm=1.0)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
        train_loss += loss.numpy()
        pbar.set_postfix({'Loss': f'{loss.numpy():.4f}'})
    
    avg_loss = train_loss / n_batches
    print(f'Epoch {epoch+1}/{EPOCHS} - Loss: {avg_loss:.4f}')

Epoch 1/50:   0%|          | 0/29 [00:00<?, ?it/s]

2025-02-10 00:59:54.644476: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: Incompatible shapes: [5,512,256] vs. [64,1,256]


InvalidArgumentError: Exception encountered when calling DoubleConv.call().

[1m{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [5,512,256] vs. [64,1,256] [Op:AddV2] name: [0m

Arguments received by DoubleConv.call():
  • inputs=['tf.Tensor(shape=(5, 512, 512), dtype=float32)', 'tf.Tensor(shape=(64, 1024), dtype=float32)']

In [None]:
def flatten(X):
    flattened_X = np.empty((X.shape[0], X.shape[2]))  # sample x features array.
    for i in range(X.shape[0]):
        flattened_X[i] = X[i, (X.shape[1]-1), :]
    return(flattened_X)

In [None]:
from sklearn.metrics import mean_squared_error

# 테스트용 시간 스텝 생성 (마지막 타임스텝 사용)
test_t = tf.fill([len(test_data)], timesteps-1)

# 예측 수행
valid_x_predictions = model(test_data, test_t)
error = flatten(test_data) - flatten(valid_x_predictions)

valid_mse = np.mean(np.power(flatten(test_data) - flatten(valid_x_predictions), 2), axis=1)

error_df = pd.DataFrame({
    'reconstruction_error': valid_mse,
    'true_class': test_labels
})

error_df

In [None]:
# 학습 데이터 예측
train_t = tf.fill([len(train_data)], timesteps-1)
X_pred_train = model(train_data, train_t)
scored_train = np.mean(np.abs(flatten(train_data) - flatten(X_pred_train)), axis=1)

# 테스트 데이터 예측
test_t = tf.fill([len(test_data)], timesteps-1)
X_pred_test = model(test_data, test_t)
scored_test = np.mean(np.abs(flatten(test_data) - flatten(X_pred_test)), axis=1)

# 시각화
plt.title('MSE compare') 
plt.hist(scored_train, bins=10, color='blue', histtype='step', label='normal') 
plt.hist(scored_test, bins=10, color='red', histtype='step', label='abnormal') 
plt.xlabel('MSE')
plt.legend()
plt.show()

In [None]:
error_df['true_class'].value_counts()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='true_class', y='reconstruction_error', data=error_df, showfliers=False, saturation=1)
plt.ylabel('Distribution')

plt.axhline(y= 0.1, xmin=0.01, xmax=1,dashes=(5,5), c='g')

plt.xticks(rotation=90)
plt.show()

In [None]:
threshold = 0.005

In [None]:
groups = error_df.groupby('true_class')
fig, ax = plt.subplots()

for name, group in groups:
    ax.plot(group.index, group.reconstruction_error, marker='^', ms=3.5, linestyle='',
            label= "Normal" if name == 0 else "Fall")
ax.hlines(threshold, ax.get_xlim()[0], ax.get_xlim()[1], colors="r", zorder=100, label='Threshold')
ax.legend()
plt.title("Reconstruction error for different classes")
plt.ylabel("Reconstruction error")
plt.xlabel("Data point index")
plt.show();

In [None]:
LABELS = ["ADL", "FALL"]

plt.figure(figsize=(7, 7))
y_pred = [1 if e > threshold else 0 for e in error_df.reconstruction_error.values]
conf_matrix = confusion_matrix(error_df.true_class, y_pred)
sns.heatmap(conf_matrix, xticklabels=LABELS, yticklabels=LABELS, annot=True, fmt="d");
plt.title("Confusion matrix")
plt.ylabel('True class')
plt.xlabel('Predicted class')
plt.show()