In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
import os

pd.set_option('display.max_columns', None)

In [None]:
# FOR NORMALIZING FEATURE VECTOR
feature_vector_main = pd.read_csv('../input/svc2004-16features/Features.csv')
feature_vector = feature_vector_main[
                         ['avgX', 'avgY', 'avgSDX', 'avgSDY', 'avgV', 'avgA', 'avgSDV', 'avgSDA', 'pen_down', 'pen_up',
                          'pen_ratio', 'sign_width', 'sign_height', 'width_height_ratio', 'total_sign_duration',
                          'range_pressure', 'max_pressure', 'sample_points', 'sample_points_to_width', 'mean_pressure',
                          'pressure_variance', 'avg_x_velocity', 'avg_y_velocity', 'max_x_velocity', 'max_y_velocity',
                          'samples_positive_x_velocity', 'samples_positive_y_velocity', 'variance_x_velocity',
                          'variance_y_velocity', 'std_x_velocity', 'std_y_velocity', 'median_x_velocity',
                          'median_y_velocity', 'corr_x_y_velocity', 'mean_x_acceleration', 'mean_y_acceleration',
                          'corr_x_y_acceleration', 'variance_x_acceleration', 'variance_y_acceleration',
                          'std_x_acceleration', 'std_y_acceleration', 'x_local_minima', 'y_local_minima']]

# scaler = RobustScaler()
# feature_vector = scaler.fit_transform(feature_vector)
# feature_vector = pd.DataFrame(feature_vector)
# feature_vector.columns = ['avgX', 'avgY', 'avgSDX', 'avgSDY', 'avgV', 'avgA', 'avgSDV', 'avgSDA', 'pen_down', 'pen_up',
#                           'pen_ratio', 'sign_width', 'sign_height', 'width_height_ratio', 'total_sign_duration',
#                           'range_pressure', 'max_pressure', 'sample_points', 'sample_points_to_width', 'mean_pressure',
#                           'pressure_variance', 'avg_x_velocity', 'avg_y_velocity', 'max_x_velocity', 'max_y_velocity',
#                           'samples_positive_x_velocity', 'samples_positive_y_velocity', 'variance_x_velocity',
#                           'variance_y_velocity', 'std_x_velocity', 'std_y_velocity', 'median_x_velocity',
#                           'median_y_velocity', 'corr_x_y_velocity', 'mean_x_acceleration', 'mean_y_acceleration',
#                           'corr_x_y_acceleration', 'variance_x_acceleration', 'variance_y_acceleration',
#                           'std_x_acceleration', 'std_y_acceleration', 'x_local_minima', 'y_local_minima']

feature_vector = ((feature_vector - feature_vector.min()) / (feature_vector.max() - feature_vector.min()))

feature_vector['ID'] = feature_vector_main['ID'] - 1
feature_vector['F'] = feature_vector_main['F']

feature_vector.to_csv('features_normalized.csv', index=False)

feature_vector = pd.read_csv('features_normalized.csv')

In [None]:
num_of_signatures = 40  # number of classes
print('The number of signatures are:', num_of_signatures)
split_percentage = 0.1

In [None]:
# SHUFFLING DATASET
# TOTAL DATASET THAT IS TRAINED OR TESTED
total_df = feature_vector[:(40 * num_of_signatures)].sample(frac=1).reset_index(drop=True)

In [None]:
# using train_test_split
y = total_df['ID']
X = total_df.drop(['ID', 'F'], axis=1)
print(X.shape, y.shape)

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=split_percentage, shuffle=True)
print(x_train.shape, x_val.shape, y_train.shape, y_val.shape)
x_train.head()

In [None]:
# # TRAIN TEST SPLIT
# train_data_count = int(0.8 * (40 * num_of_signatures))
# train_df = total_df[:train_data_count]
# test_df = total_df[train_data_count:(40 * num_of_signatures)]

# x_train = train_df[
#     ['avgX', 'avgY', 'avgSDX', 'avgSDY', 'avgV', 'avgA', 'avgSDV', 'avgSDA', 'pen_down', 'pen_up', 'pen_ratio',
#      'sign_width', 'sign_height', 'width_height_ratio', 'total_sign_duration', 'range_pressure']]
# y_train = train_df['ID']

# x_test = test_df[
#     ['avgX', 'avgY', 'avgSDX', 'avgSDY', 'avgV', 'avgA', 'avgSDV', 'avgSDA', 'pen_down', 'pen_up', 'pen_ratio',
#      'sign_width', 'sign_height', 'width_height_ratio', 'total_sign_duration', 'range_pressure']]
# y_test = test_df['ID']

In [None]:
# NEURAL NETWORK 1
model = keras.Sequential([
    keras.layers.Dense(6144, input_shape=[x_train.shape[1]]),
    keras.layers.Dense(4096, activation='relu'),
    keras.layers.Dense(2048, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(2048, activation='relu'),
    keras.layers.Dense(1024, activation='relu'),
    keras.layers.Dense(1024, activation='relu'),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(num_of_signatures, activation='softmax')
])

model.summary()

# optimizers
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
rms_prop = keras.optimizers.RMSprop(learning_rate=0.0001, rho=0.9)
ada_grad = tf.keras.optimizers.Adagrad(learning_rate=0.0001, initial_accumulator_value=0.1, epsilon=1e-07)
adam = keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)
ada_max = tf.keras.optimizers.Adamax(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07)

model.compile(optimizer=rms_prop, loss=tf.keras.losses.sparse_categorical_crossentropy, metrics=['accuracy'])

In [None]:
learning_rate_reduction = keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=7, 
                                            verbose=1, 
                                            factor=0.25, 
                                            min_lr=0.000001)

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, min_delta=0,
                                                  verbose=1, mode='auto', restore_best_weights=False)

In [None]:
history = model.fit(x_train, y_train, epochs=150, validation_data=(x_val, y_val), callbacks=[learning_rate_reduction])

In [None]:
# summarize history for train and test accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for train and test loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# # testing using the test dataset
# _, test_accuracy_1 = model.evaluate(x_val, y_val)
# print('The accuracy of model 1:', test_accuracy_1)

In [None]:
# model.save('model_val_9875_rms_8.h5')

In [None]:
# # neural network testing
# model = keras.models.load_model('model_val_96.h5')

# # user 0
# user = np.argmax(model.predict([[-0.6125257646193386,0.4151867824260709,-0.0011056291780491628,0.9474710991886862,-0.11563571586919123,-0.43863590776697775,-0.40311886181555184,-0.01804716430842045,-0.8588235294117647,0.25,-0.5027769083957752,0.07720717551881814,0.39905120692060836,-0.2795311374734562,-0.6834170854271356,0.35728952772073924,0.21643286573146292,-0.8571428571428571,-0.6919285800201466,0.1833992831665563,0.5235969059847934,2.0692303345957384,-0.8018494583335899,0.6692708333333331,-0.2414634146341463,-0.9148936170212766,-0.7647058823529411,1.6312799041308752,-0.10265984780406812,1.2035995901238192,-0.10678580469224899,0.846153846153846,-0.09090909090909088,0.09308467022142258,1.6405392730640083,-0.4681958418004259,0.010188306408870506,0.3505552272836884,-0.10998369566848107,0.35469646965234075,-0.17367654762298204,0.25,-0.6666666666666666]]))
# print('result:', user)

######################################################################################################