In [9]:
import torch
import numpy as np
import os
import scipy.io as io
import shutil

from PIL import Image
from sigver.featurelearning.models import SigNet
from sklearn import svm
from torch.utils.data import Dataset
from torchvision import transforms


class SignatureDataset(Dataset):
    """Signatures dataset."""

    def __init__(self, root_dir,
                 transform=transforms.Compose([transforms.Resize((150, 220)),
                                               transforms.ToTensor()])):
        """
        Args:
            root_dir (string): Directory with the folders containing signatures of each person.
        """
        self.root_dir = root_dir
        self.people = next(os.walk(root_dir))[1]
        self.transform = transform

    def __len__(self):
        return len(self.people)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        sample = {'signatures': [], 'names': [], 'id': self.people[idx]}
        path = os.path.join(self.root_dir, self.people[idx])
        for f in os.listdir(path):
            name = os.path.splitext(f)[0]
            ext = os.path.splitext(f)[1]
            if ext.lower() not in ['.jpg', '.jpeg', '.png', '.tif']:
                continue

            if name.startswith("."):
                continue

            im = Image.open(os.path.join(path, f)).convert('1')
            if self.transform:
                im = self.transform(im)
            im = im.view(-1, 150, 220)
            sample['signatures'].append(im)
            sample['names'].append(name)
        
        sample['signatures'] = torch.stack(sample['signatures'])
        return sample



def OneClassSVM(x_train, x_test):
    # Fit SVM model
    clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=2**-11)
    clf.fit(x_train)

    # Calculate model error
    Y_train = clf.predict(x_train)
    n_error_train = Y_train[Y_train == -1].size

    # Predict results
    Y_test = clf.predict(x_test)
    Y_prob = np.array(clf.decision_function(x_test))
    return Y_test, Y_train, n_error_train, Y_prob

# Load the model
state_dict, classification_layer, forg_layer = torch.load('models/sabourin/signet.pth')
net = SigNet().eval()
net.load_state_dict(state_dict)


root_dir = 'datasets/Signatures/CustomDataset/'
print('INPUT:', root_dir)

train_dir = root_dir + 'Ref/'
test_dir = root_dir + 'Questioned/'

datasets = {'trainset': dict(), 'testset': dict()}
dirs = [('trainset', train_dir), ('testset', test_dir)]
for name, in_dir in dirs:
    print('\nanalysing', name.upper(), '...')
    t = transforms.Compose([transforms.Resize((150, 220)),
                                     transforms.RandomRotation(2),
                                     transforms.ToTensor()])
    if name == 'testset':
        t = transforms.Compose([transforms.Resize((150, 220)),
                                         transforms.ToTensor()])
        
    dataset = SignatureDataset(in_dir, t)
    print('found', len(dataset), 'authors')
    print('extracting feature vectors ...')
    for i, person in enumerate(dataset):
        s = person['signatures']
        fn = person['id']
#         if not os.path.exists(out_dir):
#             os.makedirs(out_dir)

        # Extract features
        with torch.no_grad(): # We don't need gradients. Inform torch so it doesn't compute them
            features = net(s).numpy()
#             outfile = os.path.join(out_dir, fn + '.mat')
#             io.savemat(outfile, {'feature_vector': features, 'idx': person['names']})
            ids = person['names']
            X = features
            G = 0
            y = []
            for id in ids:
#                 if name == 'trainset':
                if 'f' not in id.lower():
                    G += 1
                    y.append(1)
                else:
                    y.append(-1)

            print("\t(%s) found %d signatures (of which genuine: %d)" % (fn, len(s), G))
            datasets[name][fn] = {'X': X, 'y': y, 'ids': ids}

trainset = datasets['trainset']
testset = datasets['testset']

right = 0
wrong = 0
false_pos = 0
false_neg = 0
accuracy = 0
stats = dict()
print('\ngetting predictions...')
for k in trainset.keys():
    if k in testset.keys():
        X_train = trainset[k]['X']
        X_test = testset[k]['X']
        y_true = testset[k]['y']
        names = testset[k]['ids']
        
        # Get predictions from OneClassSVM
        y_test, y_train, n_error_train, y_prob = OneClassSVM(X_train, X_test)
        
        # Save stats
        pred = y_true==y_test
        mistakes = [i for i, x in enumerate(pred) if not x]
        
        fpos = []
        fneg = []
        for mistake in mistakes:
            if y_true[mistake] == 1 and y_test[mistake] == -1:
                fneg.append(names[mistake])
            else:
                fpos.append(names[mistake])

        stats[k] = {
            'right': len(pred[pred==True]),
            'wrong': len(pred[pred==False]),
            'false-pos': fpos,
            'false-neg': fneg,
            'accuracy': len(pred[pred==True])/len(y_test)*100
        }
        
        right += stats[k]['right']
        wrong += stats[k]['wrong']
        false_pos += len(stats[k]['false-pos'])
        false_neg += len(stats[k]['false-neg'])
        accuracy += stats[k]['accuracy']
        print("\t%s: %.02f%% accuracy (FAR: %.2f, FRR: %.2f)" %
              (k, stats[k]['accuracy'], 
               len(stats[k]['false-pos'])/(stats[k]['right']+stats[k]['wrong'])*100,
               len(stats[k]['false-neg'])/(stats[k]['right']+stats[k]['wrong'])*100))
accuracy /= len(stats.keys())

print('\nSTATS')
print("\t# of Users:", len(stats.keys()))
print("\tTestset Size:", right+wrong)
print("\tAverage Accuracy:", round(accuracy,2), "%")
print("\tFalse Positives:", round(false_pos / (right+wrong) * 100, 2), "%")
print("\tFalse Negatives:", round(false_neg / (right+wrong) * 100, 2), "%")

INPUT: datasets/Signatures/CustomDataset/

analysing TRAINSET ...
found 7 authors
extracting feature vectors ...
	(007) found 40 signatures (of which genuine: 40)
	(000) found 40 signatures (of which genuine: 40)
	(009) found 40 signatures (of which genuine: 40)
	(008) found 40 signatures (of which genuine: 40)
	(001) found 40 signatures (of which genuine: 40)
	(004) found 40 signatures (of which genuine: 40)
	(005) found 40 signatures (of which genuine: 40)

analysing TESTSET ...
found 7 authors
extracting feature vectors ...
	(007) found 80 signatures (of which genuine: 40)
	(000) found 40 signatures (of which genuine: 40)
	(009) found 80 signatures (of which genuine: 40)
	(008) found 80 signatures (of which genuine: 40)
	(001) found 80 signatures (of which genuine: 40)
	(004) found 40 signatures (of which genuine: 40)
	(005) found 80 signatures (of which genuine: 40)

getting predictions...
	007: 85.00% accuracy (FAR: 2.50, FRR: 12.50)
	000: 77.50% accuracy (FAR: 0.00, FRR: 22.50)
	