## PRE-PROCESSING AND CALCULATION OF ATTRIBUTES

In [1]:
# This small sample data set contains signature data collected from
# five users.  For each user, there are 20 genuine signatures and
# 20 skilled forgeries.

# Each genuine/forgery signature is stored in a separate text file.
# The file names are in the format "USERx_y.txt", where x (1..5)
# indicates the user and y (1..40) indicates one signature instance
# of the corresponding user, with the first 20 (1..20) representing
# genuine signatures and the rest (21..40) representing skilled
# forgeries provided by the other users. 

# In each text file, the signature is simply represented as a
# sequence of points.  The first line stores a single integer which
# is the total number of points in the signature.  Each of the
# subsequent lines corresponds to one point characterized by seven
# features listed in the following order:

#   X-coordinate  - scaled cursor position along the x-axis
#   Y-coordinate  - scaled cursor position along the y-axis
#   Time stamp    - system time at which the event was posted
#   Button status - current button status (0 for pen-up and
#                   1 for pen-down)
#   Azimuth       - clockwise rotation of cursor about the z-axis
#   Altitude      - angle upward toward the positive z-axis
#   Pressure      - adjusted state of the normal pressure

## Library Imports

In [3]:
import pandas as pd
import numpy as np
import math
import os

## Variable Declarations

In [4]:
file_name=''
file_size=0
X=[]
Y=[]
TS=[]
BS=[]
AZ=[]
AL=[]
P=[]
V=[]
SDX=[]
SDY=[]
A=[]
SDV=[]
SDA=[]
aX=0
aY=0
aV=0
aA=0

## Preprocessing the files and evaluating velocity, standard deviations of X and Y

In [4]:
%%time
os.chdir(r'C:\Users\Mittu\Desktop\Project')
os.mkdir(r'C:\Users\Mittu\Desktop\Project\PreProcessed')
for i in range(1,41):
    for j in range(1,41):
        file_name='U'+str(i)+'S'+str(j)+'.txt'
        file=pd.read_csv(file_name,delimiter=' ',names=['X','Y','TS','BS','AZ','AL','P'],header=None,skiprows=1)
        file_size=len(file)
        X=file['X']
        Y=file['Y']
        TS=file['TS']
        BS=file['BS']
        AZ=file['AZ']
        AL=file['AL']
        P=file['P']
        aX=sum(X)/file_size
        aY=sum(Y)/file_size
        for k in range(0,file_size-1):
            if TS[k]==TS[k+1]:
                X[k+1]=(X[k]+X[k+1])/2
                Y[k+1]=(Y[k]+Y[k+1])/2
                TS[k+1]=(TS[k]+1)
                BS[k+1]=(BS[k]+BS[k+1])/2
                AZ[k+1]=(AZ[k]+AZ[k+1])/2
                AL[k+1]=(AL[k]+AL[k+1])/2
                P[k+1]=(P[k]+P[k+1])/2
            if k<file_size-1:
                V.append(((math.sqrt((X[k+1]-X[k])**2+(Y[k+1]-Y[k])**2))*(TS[file_size-1]-TS[0]))/(TS[k+1]-TS[k]))
            SDX.append((X[k]-aX)**2)
            SDY.append((Y[k]-aY)**2)
        SDX.append((X[file_size-1]-aX)**2)
        SDY.append((Y[file_size-1]-aY)**2)
        V.append(0)
        data={'X':X,'Y':Y,'TS':TS,'BS':BS,'AZ':AZ,'AL':AL,'P':P,'V':V,'SDX':SDX,'SDY':SDY}
        df=pd.DataFrame(data)
        V.clear()
        SDX.clear()
        SDY.clear()
        os.chdir(r'C:\Users\Mittu\Desktop\Project\PreProcessed')
        np.savetxt(file_name, df.values, fmt='%.3f')
        os.chdir(r'C:\Users\Mittu\Desktop\Project')

Wall time: 3min 25s


## Calculation of Acceleration

In [5]:
%%time
os.chdir(r'C:\Users\Mittu\Desktop\Project\PreProcessed')
for i in range(1,41):
    for j in range(1,41):
        file_name='U'+str(i)+'S'+str(j)+'.txt'
        file=pd.read_csv(file_name,delimiter=' ',names=['X','Y','TS','BS','AZ','AL','P','V','SDX','SDY'],header=None)
        file_size=len(file)
        X=file['X']
        Y=file['Y']
        TS=file['TS']
        BS=file['BS']
        AZ=file['AZ']
        AL=file['AL']
        P=file['P']
        V=file['V']
        SDX=file['SDX']
        SDY=file['SDY']
        for k in range(0,file_size):
            if k<file_size-1:
                A.append(((abs(V[k+1]-V[k]))*(TS[file_size-1]-TS[0]))/(TS[k+1]-TS[k]))
        A.append(0)
        data={'X':X,'Y':Y,'TS':TS,'BS':BS,'AZ':AZ,'AL':AL,'P':P,'V':V,'SDX':SDX,'SDY':SDY,'A':A}
        df=pd.DataFrame(data)
        A.clear()
        os.chdir(r'C:\Users\Mittu\Desktop\Project\PreProcessed')
        np.savetxt(file_name, df.values, fmt='%.3f')

Wall time: 1min 34s


## Calculation of Standard deviations of velocity and acceleration

In [6]:
%%time
os.chdir(r'C:\Users\Mittu\Desktop\Project\PreProcessed')
for i in range(1,41):
    for j in range(1,41):
        file_name='U'+str(i)+'S'+str(j)+'.txt'
        file=pd.read_csv(file_name,delimiter=' ',names=['X','Y','TS','BS','AZ','AL','P','V','SDX','SDY','A'],header=None)
        file_size=len(file)
        X=file['X']
        Y=file['Y']
        TS=file['TS']
        BS=file['BS']
        AZ=file['AZ']
        AL=file['AL']
        P=file['P']
        V=file['V']
        SDX=file['SDX']
        SDY=file['SDY']
        A=file['A']
        aV=sum(V)/file_size
        aA=sum(A)/file_size
        for k in range(0,file_size):
            SDV.append((V[k]-aV)**2)
            SDA.append((A[k]-aA)**2)
        data={'X':X,'Y':Y,'TS':TS,'BS':BS,'AZ':AZ,'AL':AL,'P':P,'V':V,'SDX':SDX,'SDY':SDY,'A':A,'SDV':SDV,'SDA':SDA}
        df=pd.DataFrame(data)
        SDV.clear()
        SDA.clear()
        os.chdir(r'C:\Users\Mittu\Desktop\Project\PreProcessed')
        np.savetxt(file_name, df.values, fmt='%.3f')

Wall time: 1min 1s
