In [26]:
from sklearn import svm
from sklearn import cross_validation
from sklearn import preprocessing
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('../data/DSL-StrongPasswordData.csv', sep = ',')

df = df.drop(['sessionIndex','rep'],1)

Y = df['subject'].apply(lambda x: int(x[-2:]))

X = df.drop('subject',1)
X = pd.DataFrame(preprocessing.scale(X))

X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=42)

In [15]:
def score(clf,X_test,Y_test):
    
    count = 0
    score = 0

    for i in X_test.index.values:
        y_hat = clf.predict(X_test.ix[i].reshape(1,-1))
        if y_hat == Y_test[i]:
            score += 1
        count += 1
    
    return round(score/count*100,2)

In [16]:
clf = svm.SVC()
clf.fit(X_train,Y_train)

print('This initial SVM classifier is ' + str(score(clf,X_test,Y_test)) + '% accurate')

This initial SVM classifier is 88.14% accurate


In [17]:
results = []

for i in [1,25,50,75,100]:
    print(i)
    results.append(score(svm.SVC(C = i).fit(X_train,Y_train),X_test,Y_test))
    
results

1
25
50
75
100


[88.14, 90.12, 90.0, 89.9, 89.95]

In [18]:
# test different kernels

for i in ['linear','rbf','poly','sigmoid']:
    print(i + ': ' + str(score(svm.SVC(kernel = i).fit(X_train,Y_train),X_test,Y_test)) + '%')

linear: 84.85%
rbf: 88.14%
poly: 73.73%
sigmoid: 1.32%


In [19]:
def score_complex(clf,X_test,Y_test):
    
    count = 0
    tp = 0
    fp = 0
    tn = 0
    fn = 0
    
    user = Y_test.value_counts()[1]
    imposter = Y_test.value_counts()[0]

    for i in X_test.index.values:
        
        y_hat = clf.predict(X_test.ix[i].reshape(1,-1))
        
        if Y_test[i] == 1 and y_hat == 1:
            tp += 1
        if Y_test[i] == 1 and y_hat == 0:
            fn += 1
        if Y_test[i] == 0 and y_hat == 1:
            fp += 1
        if Y_test[i] == 0 and y_hat == 0:
            tn += 1
        
        count += 1
    
    return (round(tp/user*100,2),round(tn/imposter*100,2),round(fp/imposter*100,2),round(fn/user*100,2))

In [20]:
values = Y.values

true_positives = []
false_negatives = []
true_negatives = []
false_negatives = []

for i in [x for x in range(2,58) if x in values]:

    Y = df['subject'].apply(lambda x: int(x[-2:])).apply(lambda x: 1 if x == i else 0)

    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.2, random_state=42)

    clf = svm.SVC()
    clf.fit(X_train,Y_train)

    scores = score_complex(clf,X_test,Y_test)
    
    true_positives.append(scores[0])
    false_negatives.append(scores[3])
    true_negatives.append(scores[1])
    false_negatives.append(scores[2])

    print('SUBJECT' + str(i) + ': \n True-Positive = ' + str(scores[0]) + '% \n False-Negative = ' + str(scores[3]) + '% \n True-Negative = ' + str(scores[1]) + '% \n False-Positive = ' + str(scores[2]) + '%')

SUBJECT2: 
 True-Positive = 51.39% 
 False-Negative = 48.61% 
 True-Negative = 99.95% 
 False-Positive = 0.05%
SUBJECT3: 
 True-Positive = 59.42% 
 False-Negative = 40.58% 
 True-Negative = 99.93% 
 False-Positive = 0.07%
SUBJECT4: 
 True-Positive = 67.12% 
 False-Negative = 32.88% 
 True-Negative = 100.0% 
 False-Positive = 0.0%
SUBJECT5: 
 True-Positive = 83.56% 
 False-Negative = 16.44% 
 True-Negative = 99.93% 
 False-Positive = 0.07%
SUBJECT7: 
 True-Positive = 42.59% 
 False-Negative = 57.41% 
 True-Negative = 99.98% 
 False-Positive = 0.02%
SUBJECT8: 
 True-Positive = 49.43% 
 False-Negative = 50.57% 
 True-Negative = 99.97% 
 False-Positive = 0.03%
SUBJECT10: 
 True-Positive = 84.54% 
 False-Negative = 15.46% 
 True-Negative = 99.95% 
 False-Positive = 0.05%
SUBJECT11: 
 True-Positive = 78.67% 
 False-Negative = 21.33% 
 True-Negative = 99.95% 
 False-Positive = 0.05%
SUBJECT12: 
 True-Positive = 75.27% 
 False-Negative = 24.73% 
 True-Negative = 99.97% 
 False-Positive = 0.03%

In [29]:
plt.figure()
sns.boxplot([true_positives, false_negatives])
plt.show()

TypeError: unsupported operand type(s) for /: 'list' and 'int'