# *<U>APPEND IN NATIVE PYTHON IS FASTER THAN NUMPY ARRAY</U>

In [1]:
# This small sample data set contains signature data collected from
# five users.  For each user, there are 20 genuine signatures and
# 20 skilled forgeries.

# Each genuine/forgery signature is stored in a separate text file.
# The file names are in the format "USERx_y.txt", where x (1..5)
# indicates the user and y (1..40) indicates one signature instance
# of the corresponding user, with the first 20 (1..20) representing
# genuine signatures and the rest (21..40) representing skilled
# forgeries provided by the other users.

# In each text file, the signature is simply represented as a
# sequence of points.  The first line stores a single integer which
# is the total number of points in the signature.  Each of the
# subsequent lines corresponds to one point characterized by seven
# features listed in the following order:

#   X-coordinate  - scaled cursor position along the x-axis
#   Y-coordinate  - scaled cursor position along the y-axis
#   Time stamp    - system time at which the event was posted
#   Button status - current button status (0 for pen-up and
#                   1 for pen-down)
#   Azimuth       - clockwise rotation of cursor about the z-axis
#   Altitude      - angle upward toward the positive z-axis
#   Pressure      - adjusted state of the normal pressure

In [2]:
import pandas as pd
import numpy as np

In [3]:
file_name = ''

In [4]:
list_df = []
list_size = []

## COMBINING ALL THE FILES INTO ONE LIST

In [5]:
%%time
count = 0
for i in range(1,41):
    for j in range(1, 41):
        file_name = 'U'+str(i)+'S'+str(j)+'.txt'
                
        # Reading into a dataframe and appending it to a list
        df = pd.read_csv(file_name, delimiter=' ', names=['X', 'Y', 'TS', 'T', 'AZ', 'AL', 'P'], header=None, skiprows=1)
        list_df.append(df)
        # list_df = np.append(list_df, df)
        
        # Creating list_size of storing number of rows in each file
        rows, cols = df.shape
        list_size.append(rows)
        # list_size = np.append(list_size, rows)

Wall time: 3.23 s


## CONVERTING INTO A LARGE DATAFRAME

##### ARRAY FORMAT

In [9]:
df_array = np.vstack(list_df)

##### CREATING A DATAFRAME

In [10]:
final_dataset = pd.DataFrame(df_array)
final_dataset.columns = ['X', 'Y', 'TS', 'T', 'AZ', 'AL', 'P']
final_dataset.head()

Unnamed: 0,X,Y,TS,T,AZ,AL,P
0,2933,5678,31275775,0,1550,710,439
1,2933,5678,31275785,1,1480,770,420
2,3001,5851,31275795,1,1350,830,433
3,3114,6139,31275805,1,1350,800,422
4,3217,6198,31275815,1,1350,800,415


## ATTRIBUTES CALCULATION

In [11]:
# feature list

# dynamic
velocity_array = np.array([])
x_velocity_array = np.array([])
y_velocity_array = np.array([])
avg_pressure = np.array([])

# static
avg_x = np.array([])
avg_y = np.array([])

In [12]:
distance = 0
index = 0
count = 0
x_distance = 0
y_distance = 0
pressure = 0

temp = 0

In [None]:
%%time

for x in list_size:
    for i in range(x):
        j = i-1
        if j >= 0:
            distance += ((final_dataset['X'][count+j] - final_dataset['X'][count+i])**2 + (final_dataset['Y'][count+j] - final_dataset['Y'][count+i])**2)**0.5
            
            x_distance += abs(final_dataset['X'][count+j] - final_dataset['X'][count+i])
            
            y_distance += abs(final_dataset['Y'][count+j] - final_dataset['Y'][count+i])
            
            time = final_dataset['TS'][count+x-1] - final_dataset['TS'][count]
            
            pressure += final_dataset['P'][count+i]
    count += x
    
    
    # dynamic
    # velocity calculation
    # velocity_array.append(float(distance*1000/time))
    
    # x_velocity calculation
    x_velocity_array.append(float(x_distance*1000/time))
    
    # y_velocity calculation
    y_velocity_array.append(float(y_distance*1000/time))
    
    # avg_pressure calculation
    avg_pressure.append(float(pressure/x))
    
    
    # static
    # avg_x calculation
    avg_x.append(x_distance/x)

    # avg_y calculation
    avg_y.append(y_distance/x)
    
    
    
    
    
    # initialization
    distance = 0
    x_distance = 0
    y_distance = 0
    time = 0
    pressure = 0

In [None]:
print('DYNAMIC FEATURES: ')

print('velocity     :', velocity_array[:5])

print('x_velocity   :', x_velocity_array[:5])

print('y_velocity   :', y_velocity_array[:5])

print('avg_pressure :', avg_pressure[:5])


print('STATIC FEATURES: ')

print('avg_x :', avg_x[:5])

print('avg_y :', avg_y[:5])

## COMBINE IT INTO A FEATURE VECTOR

In [None]:
fv_dictionary = {'velocity':velocity_array, 'x_velocity':x_velocity_array, 'y_velocity':y_velocity_array, 'avg_pressure':avg_pressure,
                 'avg_x':avg_x, 'avg_y':avg_y}

In [None]:
feature_vector = pd.DataFrame(fv_dictionary)

In [None]:
feature_vector.head()

## NORMALIZE THE FEATURE VECTOR