In [15]:
import tensorflow as tf
from tensorflow.keras import Model
import numpy as np 
import pandas as pd
import os
from result_display import show_result,export_anomaly
from reject_anomalies import pred_baseon_threshold,make_use_reject_anomalies
from Feature_engineer import remove_unwanted_col_autoencoder, feature_engineer_steps
import tensorflow as tf
from sklearn.model_selection import train_test_split
import torch
from datetime import datetime
import matplotlib.pyplot as plt
from Data_preprocessing_method import apply_PCA
from AutoEncoder_util import convert_type, transform_data, mad_score

In [16]:
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.16.1


In [17]:
transactions_df = pd.read_csv("data/transactions_df.csv")
terminal_profiles_df = pd.read_csv("data/terminal_profiles_table.csv")
customer_profiles_df = pd.read_csv("data/customer_profiles_table.csv")
join_terminal = pd.merge(transactions_df, terminal_profiles_df, on='terminal_id', how='inner') 
join_customer = pd.merge(join_terminal, customer_profiles_df, on='customer_id', how='inner')
# 80% for training 20% for validation
train_size = int(len(join_customer)*0.8)
join_customer_train = join_customer.iloc[:train_size]

# only normal data for training
join_customer_normal =  join_customer_train[join_customer_train['fraud'] == 0].copy()

# obtain test x with normal and fraud data
train_X,train_y = feature_engineer_steps(join_customer)
train_X = remove_unwanted_col_autoencoder(train_X)
test_X = convert_type(train_X.iloc[train_size:])
test_y = convert_type(train_y.iloc[train_size:])

# obtain train data with only normal data
train_X_normal,train_y_normal = feature_engineer_steps(join_customer_normal)
train_X_normal = convert_type(remove_unwanted_col_autoencoder(train_X_normal))



In [18]:
#split training set
X_train, X_validate = train_test_split(train_X_normal, 
                                       test_size=0.2, 
                                       random_state=42)

# transform data with normalization and min max scale
X_train_transformed = transform_data(X_train)
X_validate_transformed = transform_data(X_validate)



In [19]:
#Autoencoder network
input_dim = X_train_transformed.shape[1]
BATCH_SIZE = 256
EPOCHS = 100
autoencoder = tf.keras.models.Sequential([
    
    # deconstruct / encode
    tf.keras.layers.Dense(input_dim, activation='relu', input_shape=(input_dim, )), 
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    
    # reconstruction / decode
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(input_dim, activation='relu')
    
])
autoencoder.compile(optimizer="adam", 
                    loss="mse",
                    metrics=["acc"])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
autoencoder.summary();

In [21]:
# define our early stopping
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.0001,
    patience=10,
    verbose=1, 
    mode='min',
    restore_best_weights=True
)

save_model = tf.keras.callbacks.ModelCheckpoint(
    filepath='saved_model/autoencoder_best_weights.keras',
    save_best_only=True,
    monitor='val_loss',
    verbose=1,
    mode='min'
)

# callbacks argument only takes a list
cb = [early_stop, save_model]

In [22]:
history = autoencoder.fit(
    X_train_transformed, X_train_transformed,
    shuffle=True,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=cb,
    validation_data=(X_validate_transformed, X_validate_transformed)
);

Epoch 1/100
[1m4283/4330[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 942us/step - acc: 0.4781 - loss: 0.0566
Epoch 1: val_loss improved from inf to 0.03700, saving model to saved_model/autoencoder_best_weights.keras
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1ms/step - acc: 0.4790 - loss: 0.0564 - val_acc: 0.5976 - val_loss: 0.0370
Epoch 2/100
[1m4312/4330[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - acc: 0.5741 - loss: 0.0368
Epoch 2: val_loss improved from 0.03700 to 0.03643, saving model to saved_model/autoencoder_best_weights.keras
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - acc: 0.5741 - loss: 0.0368 - val_acc: 0.6237 - val_loss: 0.0364
Epoch 3/100
[1m4292/4330[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 1ms/step - acc: 0.5756 - loss: 0.0363
Epoch 3: val_loss improved from 0.03643 to 0.03621, saving model to saved_model/autoencoder_best_weights.keras
[1m4330/4330[0m [32m━━━━━━

## Train model with PCA option

In [23]:
# apply PCA
X_train_transformed_PCA = apply_PCA(X_train_transformed,7)
X_validate_transformed_PCA = apply_PCA(X_validate_transformed,7)

In [24]:
#Autoencoder network
input_dim = X_train_transformed_PCA.shape[1]
BATCH_SIZE = 256
EPOCHS = 100
autoencoder = tf.keras.models.Sequential([
    
    # deconstruct / encode
    tf.keras.layers.Dense(input_dim, activation='relu', input_shape=(input_dim, )), 
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(8, activation='relu'),
    
    # reconstruction / decode
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(input_dim, activation='relu')
    
])
autoencoder.compile(optimizer="adam", 
                    loss="mse",
                    metrics=["acc"])
#autoencoder.summary();

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
save_model_PCA = tf.keras.callbacks.ModelCheckpoint(
    filepath='saved_model/autoencoder_best_weights_PCA.keras',
    save_best_only=True,
    monitor='val_loss',
    verbose=1,
    mode='min'
)

# callbacks argument only takes a list
cb_PCA = [early_stop, save_model_PCA]

In [26]:
history = autoencoder.fit(
    X_train_transformed_PCA, X_train_transformed_PCA,
    shuffle=True,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=cb_PCA,
    validation_data=(X_validate_transformed_PCA, X_validate_transformed_PCA)
);

Epoch 1/100
[1m4327/4330[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 946us/step - acc: 0.5424 - loss: 0.1706
Epoch 1: val_loss improved from inf to 0.16917, saving model to saved_model/autoencoder_best_weights_PCA.keras
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1ms/step - acc: 0.5425 - loss: 0.1706 - val_acc: 0.4252 - val_loss: 0.1692
Epoch 2/100
[1m4291/4330[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 965us/step - acc: 0.6507 - loss: 0.1549
Epoch 2: val_loss improved from 0.16917 to 0.16903, saving model to saved_model/autoencoder_best_weights_PCA.keras
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - acc: 0.6507 - loss: 0.1549 - val_acc: 0.4162 - val_loss: 0.1690
Epoch 3/100
[1m4312/4330[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 972us/step - acc: 0.6519 - loss: 0.1548
Epoch 3: val_loss did not improve from 0.16903
[1m4330/4330[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - ac