In [None]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import requests
from io import StringIO
from scipy.spatial.distance import cityblock
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
os.chdir("/content/drive/My Drive")
file = 'DSL-StrongPasswordData.xlsx'
df = pd.read_excel(file)
df.head()

Unnamed: 0,subject,sessionIndex,rep,H.period,UD.period.t,H.t,UD.t.i,H.i,UD.i.e,H.e,...,UD.Shift.r.o,H.o,UD.o.a,H.a,UD.a.n,H.n,UD.n.l,H.l,UD.l.Return,H.Return
0,s002,1,1,0.1491,0.2488,0.1069,0.0605,0.1169,0.1043,0.1417,...,0.6523,0.1016,0.112,0.1349,0.0135,0.0932,0.2583,0.1338,0.2171,0.0742
1,s002,1,2,0.1111,0.234,0.0694,0.0589,0.0908,0.0449,0.0829,...,0.6307,0.1066,0.0618,0.1412,0.1146,0.1146,0.1496,0.0839,0.1917,0.0747
2,s002,1,3,0.1328,0.0744,0.0731,0.056,0.0821,0.0721,0.0808,...,0.5741,0.1365,0.1566,0.1621,0.0711,0.1172,0.1533,0.1085,0.1762,0.0945
3,s002,1,4,0.1291,0.1224,0.1059,0.1436,0.104,0.0998,0.09,...,0.6096,0.0956,0.0574,0.1457,0.0172,0.0866,0.1475,0.0845,0.2387,0.0813
4,s002,1,5,0.1249,0.1068,0.0895,0.0781,0.0903,0.0686,0.0805,...,0.6389,0.043,0.1545,0.1312,0.027,0.0884,0.1633,0.0903,0.1614,0.0818


In [None]:
#This code is meant to find the mean of each column for a specific number of specified rows, N.
import pandas as pd

def calculate_template_means(df, N):
    subjects = df["subject"].unique()
    means = {}

    for subject in subjects:
        subject_df = df[df["subject"] == subject].head(N)
        template_mean = subject_df.loc[:, "H.period":"H.Return"].mean()
        means[subject] = template_mean

    return means




In [None]:
#Calculating genuine scores using Manhattan distance
import pandas as pd
from scipy.spatial.distance import cityblock

def genuine_scores(df, means, N):
    genuine_scores = []
    for subject in df["subject"].unique():
        # Filter the DataFrame for the current subject
        subject_df = df[df["subject"] == subject]
        # Select the last N rows starting from the 4th column onwards
        filtered_df = subject_df.iloc[-N:, 3:]

        # Iterate over the rows of the filtered DataFrame
        for _, row in filtered_df.iterrows():
            # Calculate the Manhattan distance
            manhattan_distance = cityblock(row.values, means[subject])
            # Append the Manhattan distance to the genuine_scores list
            genuine_scores.append(manhattan_distance)

    return genuine_scores

#Calculating impostor scores
#When solving for the impostor scores, we need to find the manhattan distance between the mean vector and the test vectors
#In this case, this means finding the manhattan distance between the mean we calculated with the first N values of each subject, our test vectors will be the remaining N vectors in each subject

def impostor_scores(df, means, N):
    impostor_scores = []

    unique_subjects = df["subject"].unique()

    for subject in unique_subjects:
        # Get the template for the current subject
        template = means[subject]

        # Get test vectors from subjects that are not the current subject
        impostor_subjects = [s for s in unique_subjects if s != subject]
        for impostor_subject in impostor_subjects:
            impostor_data = df[df["subject"] == impostor_subject].iloc[:N, 3:]

            # Iterate over the rows of the impostor data
            for _, row in impostor_data.iterrows():
                score = cityblock(template, row.values)
                impostor_scores.append(score)

    return impostor_scores



In [None]:
#Calculate the Impostor Pass Rate and the False Reject Rate for a given threshold T

def impostor_pass_rate(impostor_scores, threshold):
  pass_count = 0
  for score in impostor_scores:
    if score <= threshold:
      pass_count += 1
      return pass_count / len(impostor_scores)

def false_reject_rate(genuine_scores, threshold):
  reject_count = 0
  for score in genuine_scores:
    if score >= threshold:
      reject_count += 1
      return reject_count / len(genuine_scores)

In [127]:
#Calculate the genuine scores when N = 200
N = 200
means = calculate_template_means(df, N)
genuine_scores_result = genuine_scores(df, means, N)

#Calculate the impostor scores when N = 200
impostor_scores_result = impostor_scores(df, means, N)

print("There are", len(genuine_scores_result), "genuine scores. The lowest is", min(genuine_scores_result), "and the highest is", max(genuine_scores_result))
print("There are", len(impostor_scores_result), "impostor scores. The lowest is", min(impostor_scores_result), "and the highest is", max(impostor_scores_result))

#Threshold values
thresholds = np.linspace(min(genuine_scores_result), max(impostor_scores_result), 5)

print("The five thresholds being used to determine the IPR and FRR are", thresholds)


There are 10200 genuine scores. The lowest is 0.25460649999999996 and the highest is 7.590737999999999
There are 510000 impostor scores. The lowest is 0.2694600000000001 and the highest is 34.629517500000006
The five thresholds being used to determine the IPR and FRR are [ 0.2546065   8.84833425 17.442062   26.03578975 34.6295175 ]


In [130]:
#Impostor pass Rates

for threshold in thresholds:
  ipr = impostor_pass_rate(impostor_scores_result, threshold)
  print(f"Impostor Pass Rate for threshold {threshold}: {ipr}")


for threshold in thresholds:
  frr = false_reject_rate(genuine_scores_result, threshold)
  print(f"False Reject Rate for threshold {threshold}: {frr}")


Impostor Pass Rate for threshold 0.25460649999999996: None
Impostor Pass Rate for threshold 8.84833425: 1.96078431372549e-06
Impostor Pass Rate for threshold 17.442062000000004: 1.96078431372549e-06
Impostor Pass Rate for threshold 26.035789750000006: 1.96078431372549e-06
Impostor Pass Rate for threshold 34.629517500000006: 1.96078431372549e-06
False Reject Rate for threshold 0.25460649999999996: 9.80392156862745e-05
False Reject Rate for threshold 8.84833425: None
False Reject Rate for threshold 17.442062000000004: None
False Reject Rate for threshold 26.035789750000006: None
False Reject Rate for threshold 34.629517500000006: None


In [None]:
#False accept rate at 0 false reject rate, when N = 100, 200, 300

def false_accept_at_zero_frr(genuine_scores_result, impostor_scores_result):
    # Determine the threshold based on maximum genuine score
    max_genuine_score = max(genuine_scores_result)
    threshold = max_genuine_score + 1  # Ensures zero FRR

    # Calculate false accepts
    false_accepts = [val for val in impostor_scores_result if val < threshold]

    # Calculate FAR
    far = len(false_accepts) / len(impostor_scores_result)

    return far, threshold

for N in [100,200,300]:
  means = calculate_template_means(df, N)
  genuine_scores_result = genuine_scores(df, means, N)
  impostor_scores_result = impostor_scores(df, means, N)
  far, threshold = false_accept_at_zero_frr(genuine_scores_result, impostor_scores_result)
  print(f"False Accept Rate at Zero False Reject Rate for" , N,  "at threshold", threshold, "is", far)






False Accept Rate at Zero False Reject Rate for 100 at threshold 9.085073 is 0.9960196078431373
False Accept Rate at Zero False Reject Rate for 200 at threshold 8.590737999999998 is 0.9971803921568627
False Accept Rate at Zero False Reject Rate for 300 at threshold 15.098206000000001 is 0.9998274509803922
