<a href="https://colab.research.google.com/github/mohammadreza-mohammadi94/Deep-Learning-Projects/blob/main/Typing%20Pattern%20Recognition%20(LSTM)/Typing_Pattern_Recognition_(RNN).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Frameworks & Setup Enviorment

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(
    format=("%(asctime)s - %(levelname)s - %(message)s"),
    level=logging.INFO,
    handlers=[
        logging.FileHandler('app_logs.log'),
        logging.StreamHandler()
    ]
)

# Load Dataset

In [10]:
def load_keystroke_dataset(file_path):
    df = pd.read_csv(file_path)

    # Chose features
    feature_columns = [col for col in df.columns if col.startswith(('H.', 'DD.', 'UD.'))]
    df_features = df[feature_columns]

    # Normalization Data
    df_features = (df_features - df_features.min()) / (df_features.max() - df_features.min())

    # Creating sequences for each user
    sequences = []
    user_groups = df.groupby('subject')

    for user_id, group in user_groups:
        seq = group[feature_columns].values
        sequences.append(seq)

    print("Number of Users: ", len(user_groups))
    print("Number of Features: ", len(feature_columns))
    return sequences, feature_columns


# Prepare Data

In [11]:
def prepare_data(sequences, max_len=20):
    X, y = [], []
    for seq in sequences:
        for i in range(len(seq) - 1):
            X.append(seq[max(0, i - max_len + 1): i + 1])
            y.append(seq[i + 1])
    X = pad_sequences(X, maxlen=max_len, padding='pre', dtype='float32')
    y = np.array(y, dtype='float32')

    print(f"Number of Sequences: {len(X)}")
    print(f"Input Dimension: {X.shape}")
    print(f"Output Dimension: {y.shape}")
    return X, y

# Create the Model

In [12]:
def build_model(input_shape, output_dim):
    inputs = Input(shape=input_shape)
    x = LSTM(64, return_sequences=False)(inputs)
    x = Dense(32, activation='relu')(x)
    outputs = Dense(output_dim, activation='linear')(x)

    model = Model(inputs, outputs)
    model.compile(optimizer = Adam(learning_rate=0.01),
                  loss='mse',
                  metrics=['mae'])
    return model

# Prediction Method

In [16]:
def predict_pattern(model, sequence, max_len):
    padded_sequence = pad_sequences([sequence[-max_len:]], maxlen=max_len, padding='pre', dtype='float32')
    prediction = model.predict(padded_sequence, verbose=0)
    return prediction[0]

# Run And Train the Model

In [13]:
file_path = '/content/DSL-StrongPasswordData.csv'
# Load & preprocess
sequences, feature_columns = load_keystroke_dataset(file_path)

# data preparation
max_len = 20
X, y = prepare_data(sequences, max_len)

# input output dimension
input_shape = (max_len, X.shape[2])
output_dim = X.shape[2]

# build and train the model
model = build_model(input_shape, output_dim)
model.summary()
# train the model
history = model.fit(X, y, batch_size=32, epochs=50, validation_split=0.2, verbose=1)

# saving the model
model.save('keystroke_pattern_model.h5')

Number of Users:  51
Number of Features:  31
Number of Sequences: 20349
Input Dimension: (20349, 20, 31)
Output Dimension: (20349, 31)


Epoch 1/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 0.0172 - mae: 0.0663 - val_loss: 0.0286 - val_mae: 0.0575
Epoch 2/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - loss: 0.0133 - mae: 0.0533 - val_loss: 0.0285 - val_mae: 0.0544
Epoch 3/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13ms/step - loss: 0.0131 - mae: 0.0523 - val_loss: 0.0272 - val_mae: 0.0507
Epoch 4/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - loss: 0.0151 - mae: 0.0514 - val_loss: 0.0274 - val_mae: 0.0525
Epoch 5/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - loss: 0.0134 - mae: 0.0505 - val_loss: 0.0275 - val_mae: 0.0516
Epoch 6/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 11ms/step - loss: 0.0124 - mae: 0.0504 - val_loss: 0.0272 - val_mae: 0.0526
Epoch 7/50
[1m509/509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 13



# Predictions

In [18]:
# Predictions
sample_sequence = sequences[0][:19]
predicted_pattern = predict_pattern(model, sample_sequence, max_len)
print(f"Sample Sequences (Last 5): {sample_sequence[-5:]}")
print(f"Predicted Pattern: {predicted_pattern}")

Sample Sequences (Last 5): [[ 0.1169  0.2562  0.1393  0.0739  0.1549  0.081   0.0892  0.1462  0.057
   0.0966  1.3501  1.2535  0.0826  1.0669  0.9843  0.1291  0.6546  0.5255
   0.1317  0.2112  0.0795  0.1434  0.1083 -0.0351  0.0869  0.2072  0.1203
   0.1027  1.1307  1.028   0.1301]
 [ 0.127   0.1839  0.0569  0.0911  0.1381  0.047   0.0895  0.1774  0.0879
   0.0739  0.6069  0.533   0.0781  0.8047  0.7266  0.1305  0.202   0.0715
   0.1204  0.1746  0.0542  0.1338  0.1521  0.0183  0.0774  0.1954  0.118
   0.0942  0.2643  0.1701  0.0631]
 [ 0.1016  0.1799  0.0783  0.0792  0.1434  0.0642  0.076   0.1412  0.0652
   0.0837  0.8381  0.7544  0.1159  0.8525  0.7366  0.1154  0.3701  0.2547
   0.1     0.1531  0.0531  0.164   0.1186 -0.0454  0.0914  0.1954  0.104
   0.1053  0.2385  0.1332  0.0771]
 [ 0.1056  0.1755  0.0699  0.0781  0.1391  0.061   0.0898  0.1613  0.0715
   0.0826  0.77    0.6874  0.0718  0.6947  0.6229  0.131   0.486   0.355
   0.0692  0.1609  0.0917  0.1262  0.0697 -0.0565  0.0772 