In [60]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers

from pathlib import Path
# from tqdm.notebook import tqdm
from imageio import imread

import seaborn as sns


In [61]:
def read_grayscale_pngs(path, width=20, height=13):
    path = Path(path)
    if not path.exists():
        print("Path doesn't exist")
        return None

    # print(len([name for name in os.listdir('{}/.'.format(path)) if os.path.isfile(name)]))
    num_files = len(list(path.glob('**/*.png'))) # Calculate amount of files in directory
    # num_files = len([f for f in path.iterdir() if path.joinpath(f).is_file()]) # Calculate amount of files in directory

    images = np.empty((num_files, 13, 20))

    for i, image_path in enumerate(sorted(path.glob('**/*.png'), key=lambda f: int(f.stem))):
        images[i] = np.array(imread(image_path))[:, :, 0] # Pixel data: It's grayscale so take only Red values from [R, G, B, A]
    return images

In [62]:
legal = read_grayscale_pngs("../data_processing/out/legal/orig")
illegal = read_grayscale_pngs("../data_processing/out/illegal/orig")  

legal_test = read_grayscale_pngs("testing/legal")
illegal_test = read_grayscale_pngs("testing/illegal")


In [63]:
from sklearn.model_selection import train_test_split# Spliiting data into test and train sets


X_train = np.concatenate((legal, illegal))
Y_train = np.concatenate((np.full(len(legal), 0), np.full(len(illegal), 1)))


X_test = np.concatenate((legal_test, illegal_test))
Y_test = np.concatenate((np.full(len(legal_test), 0), np.full(len(illegal_test), 1)))

X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.10, random_state=42)# fitting the model

In [64]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
train_dataset = train_dataset.shuffle(buffer_size=len(X_train), reshuffle_each_iteration=False, seed=133742).batch(10)


In [79]:
# Convolutional
keras.backend.clear_session()
modelr = keras.Sequential()

modelr.add(layers.Reshape((260,1), input_shape=(13,20)))
modelr.add(layers.LSTM(20))
modelr.add(layers.Dense(4, activation="relu"))
modelr.add(layers.Dense(1,  activation="sigmoid"))

modelr.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
modelr.fit(train_dataset, shuffle=False, epochs=16, validation_data=(X_val, Y_val))

loss, accuracy = modelr.evaluate(X_test, Y_test, verbose=0)
    
loss, accuracy

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


(0.6295722723007202, 0.7674418687820435)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(monitor='val_loss', mode='min', patience=2)

In [None]:
X_train = X_train.reshape(X_train.shape[0], 13, 20, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 13, 20, 1).astype('float32')

In [None]:
X_train.shape, X_test.shape

In [None]:
# Convolutional
keras.backend.clear_session()
modelc = keras.Sequential()

modelc.add(layers.Reshape((13,20,1), input_shape=(13,20)))
modelc.add(layers.Conv2D(1, 2, input_shape=(13,20,1), activation="relu"))
modelc.add(layers.Flatten())
modelc.add(layers.Dense(10, activation="relu"))
modelc.add(layers.Dense(1,  activation="sigmoid"))
# model.summary()
modelc.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
modelc.save_weights("modelc.h5") # loaded_model.load_weights("model.h5")
modelc.fit(train_dataset, shuffle=False, epochs=16, validation_data=(X_val, Y_val))

loss, accuracy = model.evaluate(X_test, Y_test, verbose=0)
    
loss, accuracy