# Project 3
## Signature Verification

## Task 1: Preprocess data to format according spec

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sys import stdout

# example_path = 'enrollment/001-g-01.txt'

# Accept as input a number of one user as string, e.g '001'
# Output format as [[userId, [[x1, y1, vx1, vy1, pressure1], [x2, y2, vx2, vy2, pressure2], [...], ...]]]
# -> userId = '001-01' (user 001 genuine signature 01) -> one user has 5 genuine signature
# To get the array with all the signature data for dtw -> output[i][1]
def preprocess_enrollment(usernumber):
    data = []
    
    for i in range(5):
        path = 'enrollment/' + str(usernumber) + '-g-0' + str(i+1) + '.txt'
        signature = []
        with open(path, "r") as lines:
            for line in lines:
                signature.append(line.split())
        data.append([str(usernumber) + '-g-0' + str(i+1), signature])
        
    processed_data = preprocess_data(data)
    
    return processed_data


# Accept as input a number of one user as string, e.g '001'
# Output format as [[userId, [[x1, y1, vx1, vy1, pressure1], [x2, y2, vx2, vy2, pressure2], [...], ...]]]
# -> userId = '001-01' (user 001 signature 01 for verification) -> one user has 45 signature
# To get the array with all the signature data for dtw -> output[i][1]
def preprocess_verification(usernumber):
    data = []
    
    for i in range(45):
        if i+1 < 10:
            userId = str(usernumber) + '-0' + str(i+1)
        else:
            userId = str(usernumber) + '-' + str(i+1)
        signature = []    
        path = 'verification/' + userId + '.txt'
        with open(path, "r") as lines:
            for line in lines:
                signature.append(line.split())
        data.append([userId, signature])
        
    processed_data = preprocess_data(data)
    
    return processed_data


def preprocess_data(data):
    data_for_dtw = []
    
    for user in data:
        vx = 0
        vy = 0
        x = 0
        y = 0 
        time = 0
        
        input_data = []
        for signature_data in user[1]:
            new_time = float(signature_data[0])
            new_x = float(signature_data[1])
            new_y = float(signature_data[2])
            pressure = float(signature_data[3])
            penup = float(signature_data[4])
            if penup == 1:
                vx = 0
                vy = 0
                input_data.append([new_x, new_y, vx, vy, pressure])
                x = new_x
                y = new_y
                time = new_time
            else:
                vx = float((new_x - x)/(new_time - time))
                vy = float((new_y - y)/(new_time - time))
                input_data.append([new_x, new_y, vx, vy, pressure])
                x = new_x
                y = new_y
                time = new_time 
                
        data_for_dtw.append([user[0], input_data])
            
    return data_for_dtw

In [2]:
test_enrollment = preprocess_enrollment('001')

In [3]:
test_verification = preprocess_verification('001')

## Task 2: Dynamic Time Warping for Signatures
Take two arrays in the format of [[x1, y1, vx1, vy1, pressure1], [x2, y2, vx2, vy2, pressure2], [...], ...] and compute the dtw distance

In [4]:
from dtw import dtw
import numpy as np

euclidean = lambda img1, img2 : np.sqrt(np.sum((img1-img2)**2))

def dynamic_time_warping(img1, img2):
    img1 = np.asarray(img1)
    img2 = np.asarray(img2)
    dist, cost, acc_cost, path = dtw(img1, img2, dist=euclidean)
    return dist

In [5]:
dynamic_time_warping(test_enrollment[0][1], test_verification[0][1])

80.91870617782547

## Task 3: Apply and train 
Take all verification signatures of one user and compare (with dtw) against the 5 genuine signatures of that user. Return the user number, the signature id number (verification) and the smallest dtw distance (dissimilarity) of the 5 dtw results.

Output for the competition:   
Store results in a file as one line per user ->   
user1, signature_ID11, dissimilarity11, signature_ID12, dissimilarity12, ...  
user2, signature_ID21, dissimilarity21, signature_ID22, dissimilarity22, ...  
e.g ->  
051, 46, 6.40341144, 21, 7.62949846, 17, 9.18516724, 03, 10.47132116, […]  
043, 02, 0.99152807, 22, 4.82357323, 14, 2.14435743, 42, 5.05044537, […]  
[…]

In [14]:
# results = e.g. [['001-01', dissimilarity1], ['001-02', dissimilarity2], ...]
def signature_classifier(usernumber):
    results = []
    signatures_enrollment = preprocess_enrollment(usernumber)
    signatures_verification = preprocess_verification(usernumber)
    message = ""
    for signature_v in signatures_verification:
        stdout.write("\r")
        last_message = message
        message = "Processing signature " + signature_v[0] + " (Size: " + str(len(signature_v[1])) + ")"
        sys.stdout.write('{message: <{fill}}'.format(message=message, fill=str(len(string))))

        stdout.flush()
        distances = []
        for signature_e in signatures_enrollment:
            distance = dynamic_time_warping(signature_v[1], signature_e[1])
            distances.append(distance)
        distances.sort
        results.append([signature_v[0],distances[0]])
        
#    with open('signature_classifier_results.txt', 'a+') as f:
#        f.write(usernumber)
#        for x in results:
#            id = (x[0].split("-"))[1]
#            f.write(", " + str(id) + ", " + str(x[1]))
#        f.write("\n")   
    
    return results       

## Task 4: Combine everything and run
There are 30 users. Each user has 45 signatures for verification (validation data) and 5 genuine signatures for comparing (enrollment, ground-truth, train data).  

Verify all 45 verification signatures of one user. Set the first 20 signatures with the smallest distance as true and the other 25 signatures as fake. Compare usernumber + id with the gt.txt to calculate the precision.
Repeat for all 30 users and compute the average precision in the end.

In [15]:
user_path = 'users.txt'
gt_path = 'gt.txt'
def read_user(user_path):
    users = []
    with open(user_path, "r") as lines:
        for line in lines:
            users.append(line.replace('\n', ''))
    return users

def read_gt(gt_path):
    gts = {}
    with open(gt_path, "r") as lines:
        for line in lines:
            gts[line.split()[0]] = line.split()[1]
    return gts
    

In [16]:
def compare(results, gt):
    results.sort(key= lambda x: x[1])
    predicted_genuine = results[:20]
    TP = 0
    precision = 0
    
    for signature in predicted_genuine:
        if gt[signature[0]] is 'g':
            TP += 1
    precision = TP/20
    
    return precision

In [17]:
usernumbers = read_user(user_path)
ground_truths = read_gt(gt_path)
total_precision = 0
for usernumber in usernumbers:
    results = signature_classifier(usernumber)
    precision = compare(results, ground_truths)
    total_precision += precision
    
average_precision = total_precision/len(usernumbers)
    
print('Mean average-precision: ' + str(average_precision))

Processing signature 002-03 (Size: 510)

KeyboardInterrupt: 