In [5]:
# This small sample data set contains signature data collected from
# five users.  For each user, there are 20 genuine signatures and
# 20 skilled forgeries.

# Each genuine/forgery signature is stored in a separate text file.
# The file names are in the format "USERx_y.txt", where x (1..5)
# indicates the user and y (1..40) indicates one signature instance
# of the corresponding user, with the first 20 (1..20) representing
# genuine signatures and the rest (21..40) representing skilled
# forgeries provided by the other users.

# In each text file, the signature is simply represented as a
# sequence of points.  The first line stores a single integer which
# is the total number of points in the signature.  Each of the
# subsequent lines corresponds to one point characterized by seven
# features listed in the following order:

#   X-coordinate  - scaled cursor position along the x-axis
#   Y-coordinate  - scaled cursor position along the y-axis
#   Time stamp    - system time at which the event was posted
#   Button status - current button status (0 for pen-up and
#                   1 for pen-down)
#   Azimuth       - clockwise rotation of cursor about the z-axis
#   Altitude      - angle upward toward the positive z-axis
#   Pressure      - adjusted state of the normal pressure

In [1]:
import pandas as pd
import numpy as np

In [2]:
file_name = ''

In [3]:
list_df = []
np.array(list_df)
list_size = []
np.array(list_size)

array([], dtype=float64)

## COMBINING ALL THE FILES INTO ONE LIST

In [4]:
count = 0
for i in range(1,41):
    for j in range(1, 41):
        file_name = 'U'+str(i)+'S'+str(j)+'.txt'
                
        # Reading into a dataframe and appending it to a list
        df = pd.read_csv(file_name, delimiter=' ', names=['X', 'Y', 'TS', 'T', 'AZ', 'AL', 'P'], header=None, skiprows=1)
        list_df.append(df)
        
        # Creating list_size of storing number of rows in each file
        rows, cols = df.shape
        list_size.append(rows)

## CONVERTING INTO A LARGE DATAFRAME

##### ARRAY FORMAT

In [5]:
df_array = np.vstack(list_df)

##### CREATING A DATAFRAME

In [6]:
final_dataset = pd.DataFrame(df_array)
final_dataset.columns = ['X', 'Y', 'TS', 'T', 'AZ', 'AL', 'P']
final_dataset.head()

Unnamed: 0,X,Y,TS,T,AZ,AL,P
0,2933,5678,31275775,0,1550,710,439
1,2933,5678,31275785,1,1480,770,420
2,3001,5851,31275795,1,1350,830,433
3,3114,6139,31275805,1,1350,800,422
4,3217,6198,31275815,1,1350,800,415


## ATTRIBUTES CALCULATION

In [7]:
# feature list

# dynamic
velocity_array = []
x_velocity_array = []
y_velocity_array = []
avg_pressure = []

# static
avg_x = []
avg_y = []

In [8]:
distance = 0
index = 0
count = 0
x_distance = 0
y_distance = 0
pressure = 0

temp = 0

In [9]:
for x in list_size:
    for i in range(x):
        j = i-1
        if j >= 0:
            distance += ((final_dataset['X'][count+j] - final_dataset['X'][count+i])**2 + (final_dataset['Y'][count+j] - final_dataset['Y'][count+i])**2)**0.5
            
            x_distance += abs(final_dataset['X'][count+j] - final_dataset['X'][count+i])
            
            y_distance += abs(final_dataset['Y'][count+j] - final_dataset['Y'][count+i])
            
            time = final_dataset['TS'][count+x-1] - final_dataset['TS'][count]
            
            pressure += final_dataset['P'][count+i]
    count += x
    
    
    # dynamic
    # velocity calculation
    velocity_array.append(float(distance*1000/time))
    
    # x_velocity calculation
    x_velocity_array.append(float(x_distance*1000/time))
    
    # y_velocity calculation
    y_velocity_array.append(float(y_distance*1000/time))
    
    # avg_pressure calculation
    avg_pressure.append(float(pressure/x))
    
    
    # static
    # avg_x calculation
    avg_x.append(x_distance/x)

    # avg_y calculation
    avg_y.append(y_distance/x)
    
    
    
    
    
    # initialization
    distance = 0
    x_distance = 0
    y_distance = 0
    time = 0
    pressure = 0

In [10]:
print('DYNAMIC FEATURES: ')

print('velocity     :', velocity_array[:5])

print('x_velocity   :', x_velocity_array[:5])

print('y_velocity   :', y_velocity_array[:5])

print('avg_pressure :', avg_pressure[:5])


print('STATIC FEATURES: ')

print('avg_x :', avg_x[:5])

print('avg_y :', avg_y[:5])

DYNAMIC FEATURES: 
velocity     : [22203.433517655238, 22930.26429846866, 24527.287240196525, 23457.56298187479, 21381.860569602373]
x_velocity   : [11205.445544554455, 11497.48322147651, 12736.133122028526, 12059.068219633944, 11249.260355029586]
y_velocity   : [16169.14191419142, 16750.0, 17482.56735340729, 16648.086522462563, 15363.905325443788]
avg_pressure : [623.2380952380952, 594.5411764705882, 583.3111111111111, 603.8414634146342, 537.0860215053764]
STATIC FEATURES: 
avg_x : [161.67857142857142, 161.23529411764707, 178.5888888888889, 176.76829268292684, 163.53763440860214]
avg_y : [233.29761904761904, 234.89411764705883, 245.14444444444445, 244.03658536585365, 223.3548387096774]


## COMBINE IT INTO A FEATURE VECTOR

In [19]:
fv_dictionary = {'velocity':velocity_array, 'x_velocity':x_velocity_array, 'y_velocity':y_velocity_array, 'avg_pressure':avg_pressure,
                 'avg_x':avg_x, 'avg_y':avg_y}

In [20]:
feature_vector = pd.DataFrame(fv_dictionary)

In [21]:
feature_vector.head()

Unnamed: 0,velocity,x_velocity,y_velocity,avg_pressure,avg_x,avg_y
0,22203.433518,11205.445545,16169.141914,623.238095,161.678571,233.297619
1,22930.264298,11497.483221,16750.0,594.541176,161.235294,234.894118
2,24527.28724,12736.133122,17482.567353,583.311111,178.588889,245.144444
3,23457.562982,12059.06822,16648.086522,603.841463,176.768293,244.036585
4,21381.86057,11249.260355,15363.905325,537.086022,163.537634,223.354839


## NORMALIZE THE FEATURE VECTOR