In [20]:
import numpy as np
from pathlib import Path
from imageio import imread
import pandas as pd

from sklearn.svm import SVC
import marginal

from tensorflow import keras
from tensorflow.keras import layers

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

%load_ext memory_profiler

In [21]:
def read_grayscale_pngs(path, width=20, height=13):
    path = Path(path)
    if path is None:
        return None

    if not path.exists():
        raise ValueError("Path {} doesn't exist".format(path))

    num_files = len(list(path.glob('**/*.png'))) # Calculate amount of files in directory
    if num_files == 0:
        print("Path {} doesn't contain any images".format(path))
        return None

    images = np.empty((num_files, 13, 20))

    for i, image_path in enumerate(sorted(path.glob('**/*.png'), key=lambda f: int(f.stem))):
        images[i] = np.array(imread(image_path))[:, :, 0] # Pixel data: It's grayscale so take only Red values from [R, G, B, A]

    return images

In [22]:
legal = np.concatenate((
    read_grayscale_pngs("out/legal/orig"), 
    read_grayscale_pngs("out/legal/mirrored"),
    read_grayscale_pngs("out/legal/rotated5.0"),
    read_grayscale_pngs("out/legal/rotated-5.0"),
))
illegal = np.concatenate((
    read_grayscale_pngs("out/illegal/orig"), 
    read_grayscale_pngs("out/illegal/mirrored"),
    read_grayscale_pngs("out/illegal/shifted"),
    read_grayscale_pngs("out/illegal/rotated5.0"),
    read_grayscale_pngs("out/illegal/rotated-5.0")
))

legal_test = read_grayscale_pngs("testing/legal")
illegal_test = read_grayscale_pngs("testing/illegal")

In [24]:
# ANN
X_train = np.concatenate((legal, illegal))
X_train = X_train / 255.0
Y_train = np.concatenate((np.full(len(legal), 0), np.full(len(illegal), 1)))

X_test = np.concatenate((legal_test, illegal_test))
X_test = X_test / 255.0
Y_test = np.concatenate((np.full(len(legal_test), 0), np.full(len(illegal_test), 1)))


In [None]:
relus = 100

# Reccurent
keras.backend.clear_session()
model = keras.Sequential()

model.add(layers.InputLayer((13,20), name="input"))
model.add(layers.Flatten())
model.add(layers.Dense(relus,  activation="relu"))
model.add(layers.Dense(1,  activation="sigmoid", name="output"))

model.compile(loss="binary_crossentropy",  metrics=["binary_accuracy"])

# dot_img_file = 'tmp/ANN.pdf'
# keras.utils.plot_model(model, to_file=dot_img_file, rankdir="LR")
%time model.fit(X_train, Y_train, shuffle=True, batch_size=20, epochs=20, verbose=0, validation_split=0.1)
%time model.predict(X_test)

In [None]:
conv_filters = 14
kernel_size = 4
relus = 50
dropout = 0.3


# Convolutional
keras.backend.clear_session()
model = keras.Sequential()

model.add(layers.InputLayer((13,20), name="input"))
model.add(layers.Reshape((13,20,1), input_shape=(13,20)))
model.add(layers.Conv2D(conv_filters, kernel_size, input_shape=(13,20,1), activation="relu"))
model.add(layers.Dropout(dropout))
model.add(layers.Flatten())
model.add(layers.Dense(relus,  activation="relu"))
model.add(layers.Dense(1,  activation="sigmoid", name="output"))

model.compile(loss="binary_crossentropy",  metrics=["binary_accuracy"])

%time model.fit(X_train, Y_train, shuffle=True, batch_size=20, epochs=20, verbose=0, validation_split=0.1)
%time model.predict(X_test)

In [None]:
rnn_cells = 50
relu_neurons = 50

# Reccurent
keras.backend.clear_session()
model = keras.Sequential()

model.add(layers.InputLayer((13,20), name="input"))
model.add(layers.Reshape((1,260), input_shape=(13,20)))
model.add(layers.LSTM(rnn_cells))
model.add(layers.Dense(relu_neurons, activation="relu"))
model.add(layers.Dense(1,  activation="sigmoid", name="output"))

model.compile(loss="binary_crossentropy",  metrics=["binary_accuracy"])

%time model.fit(X_train, Y_train, shuffle=True, batch_size=20, epochs=20, verbose=0, validation_split=0.1)
%time model.predict(X_test)

In [25]:
legal = read_grayscale_pngs("out/legal/orig")
illegal = read_grayscale_pngs("out/illegal/orig")

In [None]:
features_list = []
for i, dataset in enumerate((legal, illegal, legal_test, illegal_test)):
    features = pd.DataFrame({
        "min": np.min(dataset, axis=(1,2)),
        "max": np.max(dataset, axis=(1,2)),
        "mean": np.mean(dataset, axis=(1,2)),
        "var": np.var(dataset, axis=(1,2)),
        "sum": np.sum(dataset, axis=(1,2)),
        "ptp": np.ptp(dataset, axis=(1,2)),
        "std": np.std(dataset, axis=(1,2)),
        "trace": np.trace(dataset, axis1=1, axis2=2),

        "mmeanx": np.array([marginal.mean(image, dim='x', meanNN_TF=False) for image in dataset]),
        "mmeanxTF": np.array([marginal.mean(image, dim='x', meanNN_TF=True) for image in dataset]),

        "msdx": np.array([marginal.std(image, dim='x', meanNN_TF=False) for image in dataset]),
        "msdxTF": np.array([marginal.std(image, dim='x', meanNN_TF=True) for image in dataset]),

        "mmeany": np.array([marginal.mean(image, dim='y', meanNN_TF=False) for image in dataset]),
        "mmeanyTF": np.array([marginal.mean(image, dim='y', meanNN_TF=True) for image in dataset]),

        "msdy": np.array([marginal.std(image, dim='y', meanNN_TF=False) for image in dataset]),
        "msdyTF": np.array([marginal.std(image, dim='y', meanNN_TF=True) for image in dataset]),

        "target": 0 if i % 2 == 0 else 1
    })
    features_list.append(features)

legal_features, illegal_features, legal_test_features, illegal_test_features = tuple(features_list)

chosen_features = []

features = pd.concat((illegal_features, legal_features))
X_shallow_train = features.drop('target', axis=1) if len(chosen_features) == 0 else features[chosen_features]
Y_shallow_train = features['target']

features = pd.concat((illegal_test_features, legal_test_features))
X_shallow_test = features.drop('target', axis=1) if len(chosen_features) == 0 else features[chosen_features]
Y_shallow_test = features['target']


In [None]:
model = LogisticRegression()
%time model.fit(X_shallow_train, Y_shallow_train)

%time model.predict(X_shallow_test)

In [None]:
model = SVC()
%time model.fit(X_shallow_train, Y_shallow_train)

%time model.predict(X_shallow_test)

In [None]:
model = RandomForestClassifier()
%time model.fit(X_shallow_train, Y_shallow_train)

%time model.predict(X_shallow_test)