In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from tqdm import tqdm
from itertools import groupby
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB

import myfunctions
import myplot
import mylearn
import myhelpers

# Home, quiet, long measurement, (Space,Return,Back,Tilde), 903 keystrokes with 731 > 0.1
noweb_logitech1 = ('winform/winform-1619400514590', -80, True, 'home,long,4key') # Still lost some samples after about 100 seconds

# Home, quiet, short measurement taken a little while after noweb_logitech1, (Space,Return,Back,Tilde), 42 keystrokes with 19 > 0.1
noweb_logitech2 = ('winform/winform-1619417452127', 70, False, 'home,short,4key')

# NTU lab, long, (Space,Return,Back,Tilde)
noweb_logitech3 = ('winform/winform-1619490434553', 70, False, 'NTU,long,4key')

# NTU lab, short, (Space,Return,Back,Tilde)
noweb_logitech4 = ('winform/winform-1619497973221', 70, False, 'NTU,short,4key')

# NTU lab, short, AC noise, (Space,Return,Back,Tilde)
noweb_logitech5 = ('winform/winform-1619498839675', 70, False, 'NTU,short,4key,AC')

# NTU lab, short, AC noise, typing some text naturally
noweb_logitech6 = ('winform/winform-1619499075929', 70, False, 'NTU,short,4key,AC')


def getData2(x, keystroke_min_peak_level):
    if len(x) == 4:
        path, sync_adjustment, adjust_missing_samples, name = x
    else:
        path, sync_adjustment, adjust_missing_samples = x
        name = ''
    d = myfunctions.getData(path,
                            sync_adjustment=sync_adjustment,
                            adjust_missing_samples=adjust_missing_samples,
                            keystroke_min_peak_level=keystroke_min_peak_level)
    d['name'] = name
    print(myhelpers.getListGroupPercentages([x[0] for x in d['keystrokes']], '\n'))
    return d

d1 = getData2(noweb_logitech1, keystroke_min_peak_level=0.05)
d2 = getData2(noweb_logitech2, keystroke_min_peak_level=0.05)
d3 = getData2(noweb_logitech3, keystroke_min_peak_level=0.05)
d4 = getData2(noweb_logitech4, keystroke_min_peak_level=0.05)
d5 = getData2(noweb_logitech5, keystroke_min_peak_level=0.05)
d6 = getData2(noweb_logitech6, keystroke_min_peak_level=0.05)


In [None]:
mylearn.addFeatures(d1)
mylearn.addFeatures(d2)
mylearn.addFeatures(d3)
mylearn.addFeatures(d4)
mylearn.addFeatures(d5)
mylearn.addFeatures(d6)

In [None]:
# Code below copied and modified from https://towardsdatascience.com/siamese-networks-line-by-line-explanation-for-beginners-55b8be1d2fc6

import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os

import tensorflow as tf
import keras
from keras.layers import Input, Conv2D, Lambda, merge, Dense, Flatten,MaxPooling2D
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras import backend as K
from keras.optimizers import SGD,Adam
from keras.losses import binary_crossentropy
from keras.utils import plot_model

In [None]:
def getXY(data):
    x = data['mfcc']
    y = np.asarray(data['keystroke_labels'])

    return x, y

def getCatlist(y):
    keyindexes = [(key,i) for i,key in enumerate(y)]
    sortedkeyindexes = sorted(keyindexes, key=lambda v: v[0])
    groupedkeyindexes = groupby(sortedkeyindexes, lambda v: v[0])
    catlist = dict([(key, [v[1] for v in group]) for key, group in groupedkeyindexes])

    return catlist


def getTestTrain(data, samples_per_training_key=5, number_of_training_keys=None):
    x, y = getXY(d1)
    catlist = getCatlist(y)

    keys = set(y)
    number_of_training_keys = 2
    keys_train = random.sample(keys, number_of_training_keys)
    keys_test = list(keys - set(keys_train))

    samples_per_training_key = 4
    idxs_train = sum([random.sample(catlist[key], samples_per_training_key) for key in keys_train], [])
    x_train = x[idxs_train]
    y_train = y[idxs_train]

    idxs_test = sum([catlist[key] for key in keys_test], [])
    x_test = x[idxs_test]
    y_test = y[idxs_test]
       
    print(f'X&Y shape of training data: {x_train.shape} and {y_train.shape}. Keys: {len(keys_train)}.')
    print(f'X&Y shape of testing data: {x_test.shape} and {y_test.shape}. Keys: {len(keys_test)}.')

    return x_train, y_train, x_test, y_test
x_train, y_train, x_test, y_test = getTestTrain(d1)

In [None]:
def getBatch(x, y, batchsize):
    catlist = getCatlist(y)
    keys = list(catlist.keys())

    batch_y = np.zeros(batchsize)
    batch_y[int(batchsize/2):] = 1
    np.random.shuffle(batch_y)
    
    batch_x = [[], []]
    for i in range(0, batchsize):
        key = random.choice(keys)
        batch_x[0].append(x[random.choice(catlist[key])])
        #If train_y has 0 pick from the same class, else pick from any other class
        if batch_y[i]==0:
            batch_x[1].append(x[random.choice(catlist[key])])
        else:
            other_key = random.choice([k for k in keys if k != key])
            batch_x[1].append(x[random.choice(catlist[other_key])])
    batch_x = np.asarray(batch_x)
    print(f'X&Y shape of batch data: {batch_x.shape} and {batch_y.shape}.')

    return batch_x, batch_y

In [None]:
#Building a sequential model
input_shape=(32, 21, 1)
left_input = Input(input_shape)
right_input = Input(input_shape)

W_init = keras.initializers.RandomNormal(mean = 0.0, stddev = 1e-2)
b_init = keras.initializers.RandomNormal(mean = 0.5, stddev = 1e-2)

model = keras.models.Sequential([
    keras.layers.Conv2D(64, (2,4), activation='relu', input_shape=input_shape, kernel_initializer=W_init, bias_initializer=b_init, kernel_regularizer=l2(2e-4)),
    keras.layers.MaxPooling2D(2, 2),
    keras.layers.Conv2D(128, (7,7), activation='relu', kernel_initializer=W_init, bias_initializer=b_init, kernel_regularizer=l2(2e-4)),
    keras.layers.MaxPooling2D(2,2),
#     keras.layers.Conv2D(128, (4,4), activation='relu', kernel_initializer=W_init, bias_initializer=b_init, kernel_regularizer=l2(2e-4)),
#     keras.layers.MaxPooling2D(2,2),
#     keras.layers.Conv2D(256, (4,4), activation='relu', kernel_initializer=W_init, bias_initializer=b_init, kernel_regularizer=l2(2e-4)),
#     keras.layers.MaxPooling2D(2,2),
#     keras.layers.Flatten(),
#     keras.layers.Dense(4096, activation='sigmoid', kernel_initializer=W_init, bias_initializer=b_init)
])

encoded_l = model(left_input)
encoded_r = model(right_input)

# L1_distance = lambda x: K.abs(x[0] - x[1])
# print(type(L1_distance))
# both = merge([encoded_l, encoded_r], mode=L1_distance, output_shape = lambda x:x[0])
subtracted = keras.layers.Subtract()([encoded_l, encoded_r])
prediction = Dense(1, activation='sigmoid', bias_initializer=b_init)(subtracted)
#siamese_net = Model(input=[left_input, right_input], output=prediction) # NR: I changed this
siamese_net = Model([left_input, right_input], prediction)

optimizer= Adam(learning_rate=0.0006)
siamese_net.compile(loss='binary_crossentropy', optimizer=optimizer)

optimizer = Adam(lr = 0.00006)
model.compile(loss="binary_crossentropy",optimizer=optimizer)

plot_model(siamese_net, show_shapes=True, show_layer_names=True)