In [4]:
# Disable warnings from printing
from warnings import filterwarnings
filterwarnings('ignore')

import numpy as np
import pandas as pd

from sklearn.metrics import roc_curve
from scipy.spatial.distance import cityblock, mahalanobis, euclidean
from sknn.mlp import Regressor, Layer

In [5]:
data = pd.read_csv("DSL-StrongPasswordData.csv")

In [6]:
# 51 total
subjects = data["subject"].unique()

In [7]:
def evaluateEER(user_scores, imposter_scores):
    labels = [0]*len(user_scores) + [1]*len(imposter_scores)
    fpr, tpr, thresholds = roc_curve(labels, user_scores + imposter_scores)
    missrates = 1 - tpr
    farates = fpr
    dists = missrates - farates
    idx1 = np.argmin(dists[dists >= 0])
    idx2 = np.argmax(dists[dists < 0])
    x = [missrates[idx1], farates[idx1]]
    y = [missrates[idx2], farates[idx2]]
    a = ( x[0] - x[1] ) / ( y[1] - x[1] - y[0] + x[0] )
    eer = x[0] + a * ( y[0] - x[0] )
    return eer

In [8]:
class NeuralNetAutoAssocDetector:
    
    def __init__(self, subjects):
        self.user_scores = []
        self.imposter_scores = []
        self.subjects = subjects
        self.learning_rate = 0.0001
        self.training_epochs = 500
        self.n_hidden = 31
        self.learning_momentum = 0.0003

    def training(self):
        self.nn = Regressor(
            layers=[
                Layer("Rectifier", units=self.n_hidden),
                Layer("Linear")
                ],
                learning_rate=self.learning_rate,
                learning_momentum=self.learning_momentum,
                n_iter=self.training_epochs
        )
        self.nn.fit(np.array(self.train), np.array(self.train))
        
    def testing(self):
        preds = self.nn.predict(np.array(self.test_genuine))
        for i in range(self.test_genuine.shape[0]):
            self.user_scores.append(np.linalg.norm(self.test_genuine.iloc[i].values - preds[i]))
            
        preds = self.nn.predict(np.array(self.test_imposter))
        for i in range(self.test_imposter.shape[0]):
            self.imposter_scores.append(np.linalg.norm(self.test_imposter.iloc[i].values - preds[i]))       
    
    def evaluate(self):
        eers = []
        
        for subject in subjects:
            
            self.user_scores = []
            self.imposter_scores = []
    
            # Consider current subject as genuine and rest as imposters
            genuine_user_data = data.loc[data.subject == subject, "H.period":"H.Return"]
            imposter_data = data.loc[data.subject != subject, :]
    
            # genuine user's first 200 time vectors for training
            self.train = genuine_user_data[:200]
    
            # True set (200 records)
            self.test_genuine = genuine_user_data[200:]
    
            # False set (250 records, 5 per imposter, 50 imposters in all)
            self.test_imposter = imposter_data.groupby("subject").head(5).loc[:, "H.period":"H.Return"]
            
            self.training()
            
            self.testing()
    
            eers.append(evaluateEER(self.user_scores, self.imposter_scores))
        
        return np.mean(eers), np.std(eers)        

In [9]:
NeuralNetAutoAssocDetector(subjects).evaluate()

(0.16417552306737038, 0.091419929898598581)