In [1]:
import pandas as pd
import tkinter as tk
from tkinter import *
from tkinter import messagebox
import time
import statistics 
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
import json

In [2]:
"""
To collect: 

1 - number of keys per second
2 - avg time pressing 1 key
3 - stddev of times of pressing 1 key
4 - avg time between pressing 2 keys
5 - stddev of times between pressing 2 keys

"""

'\nTo collect: \n\n1 - number of keys per second\n2 - avg time pressing 1 key\n3 - stddev of times of pressing 1 key\n4 - avg time between pressing 2 keys\n5 - stddev of times between pressing 2 keys\n\n'

In [3]:
MIN_LENGTH = 8

In [4]:
class User:
    
    def __init__ (self, name, password, is_true_user = False):
        self.name = name
        self.password = password
        self.is_true_user = is_true_user

        self.entire_database = pd.DataFrame()
        self.database_name = name + ".xlsx"

In [5]:
class PasswordManager:
    
    def __init__(self, user):
        self.user = user
        
    def append_password(self):
        """
        Appends user's password to .json database if not there yet
        """
        
        with open("passwords.json") as json_file: 
            data = json.load(json_file) 
            
        dict_data = data[0]
        if (not self.user.name in dict_data):

            entry = {self.user.name : self.user.password}
            data.append(entry)

            with open("passwords.json","w") as json_file: 
                json.dump(data, json_file, indent = 4) 
        else:
            print("password already in the database")
    
    
    def get_password(self):
        """
        Fetches user's password from .json database if it's there

        Return:
        * user's password if exists, "-1" is doesn't
        """
        
        with open("passwords.json") as json_file: 
            data = json.load(json_file) 

        dict_data = data[0]
            
        try:
            return dict_data[self.user.name]
        except:
            print("no such user found")
            return "-1"
            

In [6]:
class DataExtractor:
    
    def __init__ (self, user):
        self.user = user
        self.filename = user.database_name
        
        
    def extract(self):
        """
        Fetches password entry data of a user

        Return:
        * entry data (features), targets (true values)
        """
        
        try:
            data = pd.read_excel(self.filename)
            targets = data["target"].values.tolist()
            data = data.drop("target", axis = 1).values.tolist()
            return (targets, data)
        except:
            print("no data exist for this user")
            return ([], [])
        

In [7]:
class DataSaver:
    
    def __init__ (self, user, features):
        self.user = user
        self.features = features
        self.filename = user.database_name
        
    def create_database(self):
        """
        Creates database for new user
        """
        
        writer = pd.ExcelWriter(self.filename, engine = 'xlsxwriter')
        writer.save()
        
    def save_to_file(self):
        """
        Saves a row of features of new password entry to user's database
        """
        
        writer = pd.ExcelWriter(self.filename, engine = 'xlsxwriter')
        
        data = pd.DataFrame(self.features)
        try:
            read_data = pd.read_excel(self.filename)
            updated_data = pd.concat([data, read_data])
            updated_data.to_excel(writer, sheet_name = "Sheet1", index = False)
        except:
            self.create_database()
            data.to_excel(writer, sheet_name = "Sheet1", index = False)
        
        writer.save()

        
        

In [8]:
class FeatureExtractor:
    
    def __init__ (self, data, user):
        self.data = data
        self.user = user
        self.features = {}
        
    def extract_features(self):
        """
        Generates all features of password entry and fetches target
        """
        
        self.get_target()
        self.get_pressing_rate()
        self.avg_stdev_pressing1()
        self.avg_stdev_between_pressing2()
        
        
    def get_target(self):
        """
        Fetches target
        """
        
        if (self.user.is_true_user):
            self.features["target"] = [1]
        else:
            self.features["target"] = [0]
        
        
    def get_pressing_rate(self):
        """
        Generates feature 1: avg number of keys per second - rate of pressing keys
        """
        
        data_press = self.data[self.data["event"] == "KeyPress"]
        if (len(data_press) < 2):
            return 0.0
        elapsed_time = data_press.iloc[-1]["time"] - data_press.iloc[0]["time"]
        rate = float(len(data_press)) / elapsed_time
        
        self.features["pressing_rate"] = [rate]
    
    
    def avg_stdev_pressing1(self):
        """
        Generates feature 2: avg time pressing 1 key
        Generates feature 3: stdev of times of pressing 1 key
        """
        
        data_keys = self.data.sort_values(by = ["key", "time"])
        if (len(data_keys) % 2 != 0):
            print("we've got uneven pressing/releasing for some reason \n", data_keys)
            return 0
        
        elapsed_times = []
        for i in range(1, len(data_keys)//2, 2):
            elapsed_time = data_keys.iloc[i]["time"] - data_keys.iloc[i-1]["time"]
            elapsed_times.append(elapsed_time)
        
        self.features["avg_pressing1"] = [statistics.mean(elapsed_times)]
        self.features["stdev_pressing1"] = [statistics.stdev(elapsed_times)]
    

    def avg_stdev_between_pressing2(self):
        """
        Generates feature 4: avg time between pressing 2 keys
        Generates feature 5: stdev of times between pressing 2 keys
        """
        
        data_press = self.data[self.data["event"] == "KeyPress"]
        if (len(data_press) < 2):
            print("len \n", data_press)
            print("event: ", self.data.iloc[0]["event"])
            return 0.0
        
        elapsed_times = []
        for i in range(1, len(data_press)):
            elapsed_time = data_press.iloc[i]["time"] - data_press.iloc[i-1]["time"]
            elapsed_times.append(elapsed_time)
        
        self.features["avg_between_pressing2"] = [statistics.mean(elapsed_times)]
        self.features["stdev_between_pressing2"] = [statistics.stdev(elapsed_times)]
            
            
        

In [9]:
class Recorder:
    
    # TODO:: manage keys like SHIFT etc??
    
    def __init__ (self, user, purpose):
        self.user = user
        self.keys_used = set()
        self.is_start = True
        self.start_time = 0.0
        
        self.database = pd.DataFrame(columns = ["time", "key", "event"])
        self.nr_entry = 0
        
        self.purpose = purpose # generate, check
        
        self.check_data = []

        
    def on_key_press(self, event):
        """
        Records the key pressed in textbox and records it in internal database
        """
        
        if (self.is_start):
            self.start_time = time.time()
            self.is_start = False
        
        new_event = {"time": time.time() - self.start_time, 
                     "key": event.char, 
                     "event": "KeyPress"}
        self.database = self.database.append(new_event, ignore_index = True)
        if (not event.char in self.keys_used):
            self.keys_used.add(event.char)
        
        
    def on_key_release(self, event):
        """
        Records the key released in textbox and records it in internal database
        """
        
        if (event.char == "\r"):
            return
        
        if (self.is_start):
            self.start_time = time.time()
            self.is_start = False
            
        new_event = {"time": time.time() - self.start_time, 
                     "key": event.char, 
                     "event": "KeyRelease"}
        self.database = self.database.append(new_event, ignore_index = True)
        if (not event.char in self.keys_used):
            self.keys_used.add(event.char)
        
    def get_faulty_indices(self, data):
        """
        Determines indices of wrong recordings in data

        Parameters:
        * data: where to search for wrong recordings
        Return:
        * indices in data where wrong recordings detected
        """
        
        faulty_indices = []
        
        for i in range(1, len(data)):
            if (data.iloc[i]["event"] == data.iloc[i-1]["event"]):
                if (data.iloc[i]["event"] == "KeyPress"):
                    faulty_indices.append(data.index[i])
                else:
                    faulty_indices.append(data.index[i-1])
                    
        return faulty_indices
                    
    def filter_data(self):
        """
        Drops wrong recordings and resets inndices in internal databse
        """

        self.database.sort_values(by = "time", inplace = True)
        self.database.reset_index(drop = True, inplace = True)
        
        for key in self.keys_used:
            key_data = self.database[self.database["key"] == key]
            faulty_indices = self.get_faulty_indices(key_data)
            self.database.drop(faulty_indices, inplace = True)
            
        self.database.reset_index(drop = True, inplace = True)
        
    def on_return(self, window, event, password_txt):
        """
        Checks password in textbox, prepares internal databse, saves it to user's database, clears input
        """
        
        password = password_txt.get()
        if (len(password) < MIN_LENGTH):
            messagebox.showerror("Error", "Password should contain at least " + str(MIN_LENGTH) + " characters.")
            password_txt.delete(0, 'end')
            password_txt.focus()
            return
        
        # TODO:: compare with .json password instead of user
        if (password != self.user.password):
            messagebox.showerror("Error", "Entered password is wrong. Try again.")
            password_txt.delete(0, 'end')
            password_txt.focus()
            return
            
        self.filter_data()
        
        print("DATABASE: \n", self.database)
        
        extractor = FeatureExtractor(self.database, self.user)
        extractor.extract_features()
        
        if (self.purpose == "generate"):
        
            manager = DataSaver(self.user, extractor.features)
            manager.save_to_file()

            self.database["entry"] = self.nr_entry
            self.nr_entry += 1
            self.user.entire_database = pd.concat([self.user.entire_database, self.database])
            
        self.database = self.database[0:0]
        self.is_start = True

        password_txt.delete(0, 'end')
        password_txt.focus()

        check_data_df = pd.DataFrame(extractor.features)
        self.check_data = check_data_df.drop("target", axis = 1).values.tolist()
            
    
    def record_data(self):
        """
        Creates a window with textbox; initiates and manages password recording
        """
        
        window = Tk()
        window.geometry("300x200")
        window.title("Biometrics")
            
        lbl = Label(window, text = self.purpose + " " + self.user.name, font = ("Arial Bold", 15), padx = 5)
        lbl.grid(column = 0, row = 0, sticky = W+E)
        
        txt = Entry(window, width = 10)
        txt.grid(column = 0, row = 30, sticky = W+E)
        txt.focus()
        
        window.bind("<KeyPress>", self.on_key_press)
        window.bind("<KeyRelease>", self.on_key_release)
        window.bind("<Return>", lambda event: self.on_return(window, event, txt))
    
        window.mainloop()
        
        return self.check_data

In [10]:
class Predictor:
    
    def __init__(self, data, target, user):
        self.data = data
        self.target = target
        self.user = user
        
    def predict(self):
        """
        Predict if true user by password entry in textbox using SVM
        """
        
        X_train = self.data
        y_train = self.target
        
        clf = svm.SVC(kernel = "rbf") # RBF Kernel
        clf.fit(X_train, y_train)
        
        recorder = Recorder(self.user, "check")
        X_test = recorder.record_data()
        
        print("X_test = ", X_test)

        y_pred = clf.predict(X_test)
        
        print("*** PREDICTED = ", y_pred) 
        
        
    def evaluate(self):
        """
        Evaluates data in user's database using SVM
        """
        
        X_train, X_test, y_train, y_test = train_test_split(self.data, self.target, test_size = 0.3, random_state = 109) # 70% training and 30% test
        
        clf = svm.SVC(kernel = "rbf") # RBF Kernel
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
        print("Precision:", metrics.precision_score(y_test, y_pred))
        print("Recall:", metrics.recall_score(y_test, y_pred))
        

In [11]:
def execute():
    
    name = input("Please enter username: ")
    password = input("Please enter password: ")
    
    # TODO:: maybe check with .json here
    
    if (len(password) < MIN_LENGTH):
        print("Error", "Password should contain at least " + str(MIN_LENGTH) + " characters.")
        return
    
    purpose = input("Do you want to generate data or check password?\n" +
                    "Enter 1 for generate, 2 for check: ")

    if (purpose == "1"): # generate
        is_true_person_number = input("Are you the real user?\n" +
                                      "Enter 1 for yes, 0 for no: ")
        is_true_person = True if (is_true_person_number == "1") else False

        user = User(name, password, is_true_person)

        if (is_true_person):
            password_manager = PasswordManager(user)
            password_manager.append_password()

        recorder = Recorder(user, "generate")
        recorder.record_data()

    elif (purpose == "2"): # check
        
        # TODO:: check if there is anything to check 

        user = User(name, password)

        password_manager = PasswordManager(user)
        true_password = password_manager.get_password()
        if (password != true_password):
            print("Entered password is wrong.")
            return

        extractor = DataExtractor(user)
        target, data = extractor.extract()

        predictor = Predictor(data, target, user)
        predictor.evaluate()
        predictor.predict()
    

In [12]:
execute()

Please enter username: Mariia
Please enter password: ilovepuppies239
Do you want to generate data or check password?
Enter 1 for generate, 2 for check: 1
Are you the real user?
Enter 1 for yes, 0 for no: 1
password already in the database
DATABASE: 
         time key       event
0   0.000001   i    KeyPress
1   0.003888   i  KeyRelease
2   0.124771   l    KeyPress
3   0.127999   l  KeyRelease
4   0.260784   o    KeyPress
5   0.263679   o  KeyRelease
6   0.461093   v    KeyPress
7   0.463341   v  KeyRelease
8   0.561216   e    KeyPress
9   0.565109   e  KeyRelease
10  0.769401   p    KeyPress
11  0.773576   p  KeyRelease
12  1.225310   u    KeyPress
13  1.229421   u  KeyRelease
14  1.555250   p    KeyPress
15  1.559410   p  KeyRelease
16  1.654580   p    KeyPress
17  1.658153   p  KeyRelease
18  1.820008   i    KeyPress
19  1.824030   i  KeyRelease
20  1.907263   e    KeyPress
21  1.910537   e  KeyRelease
22  2.079510   s    KeyPress
23  2.083667   s  KeyRelease
24  2.265612   2    KeyP