<a href="https://colab.research.google.com/github/mohamed-ben-lboukht/keystroke/blob/main/keystroke_pass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import necessary libraries for machine learning, data preprocessing, and evaluation metrics.



In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# Load the preprocessed dataset containing keystroke dynamics features.
# This dataset includes extracted features based on users' typing behavior.



In [None]:
df = pd.read_csv('GREYCNISLAB_NEW4.csv')

df = df.drop(columns=["User_ID", "Password"])

label_encoders = {}
for col in ["Gender", "Handedness"]:
    label_encoders[col] = LabelEncoder()
    df[col] = label_encoders[col].fit_transform(df[col])

X = df.iloc[:, 3:].values
y_age = df["Age"].values
y_gender = df["Gender"].values
y_handedness = df["Handedness"].values


scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_age_train, y_age_test = train_test_split(X, y_age, test_size=0.2, random_state=42)
_, _, y_gender_train, y_gender_test = train_test_split(X, y_gender, test_size=0.2, random_state=42)
_, _, y_handedness_train, y_handedness_test = train_test_split(X, y_handedness, test_size=0.2, random_state=42)




# Defining a neural network model with a shared architecture.
# The model has an input layer and two shared hidden layers across different tasks.
# It predicts three outputs: age (regression), gender (binary classification), and handedness (binary classification).



In [None]:

input_layer = Input(shape=(X_train.shape[1],))

shared = Dense(128, activation="relu")(input_layer)
shared = Dense(64, activation="relu")(shared)

age_output = Dense(1, activation="linear", name="age")(shared)

gender_output = Dense(1, activation="sigmoid", name="gender")(shared)

handedness_output = Dense(1, activation="sigmoid", name="handedness")(shared)

# Modèle
model = Model(inputs=input_layer, outputs=[age_output, gender_output, handedness_output])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss={
        "age": "mse",
        "gender": "binary_crossentropy",
        "handedness": "binary_crossentropy",
    },
    loss_weights={"age": 1.0, "gender": 0.5, "handedness": 0.5},
    metrics={"age": "mae", "gender": "accuracy", "handedness": "accuracy"},
)

model.summary()

# Training the model for 50 epochs with a batch size of 32.
# Using training data along with a validation set to monitor performance.
# Labels are provided as a dictionary corresponding to the multiple outputs.



In [None]:
history = model.fit(
    X_train,
    {"age": y_age_train, "gender": y_gender_train, "handedness": y_handedness_train ,"class":y_class_train},
    validation_data=(X_test, {"age": y_age_test, "gender": y_gender_test, "handedness": y_handedness_test,"class":y_class_test}),
    epochs=50,
    batch_size=32,
)

Epoch 1/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - age_loss: 7.3667 - age_mae: 1.9476 - gender_accuracy: 0.7487 - gender_loss: 0.5201 - handedness_accuracy: 0.9248 - handedness_loss: 0.1987 - loss: 7.7261 - val_age_loss: 106.4424 - val_age_mae: 7.7859 - val_gender_accuracy: 0.7318 - val_gender_loss: 0.5918 - val_handedness_accuracy: 0.9091 - val_handedness_loss: 0.2616 - val_loss: 107.0253
Epoch 2/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - age_loss: 8.7144 - age_mae: 1.9812 - gender_accuracy: 0.7282 - gender_loss: 0.5259 - handedness_accuracy: 0.9278 - handedness_loss: 0.1977 - loss: 9.0763 - val_age_loss: 110.3216 - val_age_mae: 7.8878 - val_gender_accuracy: 0.7250 - val_gender_loss: 0.5939 - val_handedness_accuracy: 0.9023 - val_handedness_loss: 0.2514 - val_loss: 111.0038
Epoch 3/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - age_loss: 7.4611 - age_mae: 1.8597 - gender_accuracy: 0.7

# Evaluating the model on the test dataset.
# Retrieving loss values and accuracy metrics for each output.



In [None]:

loss, age_loss, gender_loss, handedness_loss, age_mae, gender_acc, handedness_acc = model.evaluate(
    X_test,
    {"age": y_age_test, "gender": y_gender_test, "handedness": y_handedness_test},
)

print(f"MAE pour l'âge : {age_mae}")
print(f"Précision pour le genre : {gender_acc}")
print(f"Précision pour handedness : {handedness_acc}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - age_loss: 111.7044 - age_mae: 8.0133 - gender_accuracy: 0.7217 - gender_loss: 0.6029 - handedness_accuracy: 0.9010 - handedness_loss: 0.2286 - loss: 112.1553 
MAE pour l'âge : 7.882318496704102
Précision pour le genre : 0.7272727489471436
Précision pour handedness : 0.9068182110786438


# Adding Dropout layers to prevent overfitting.
# This new version of the model includes regularization layers to improve generalization.


In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout

# Définir l'entrée
input_layer = Input(shape=(X_train.shape[1],), name="input")

# Couche partagée
shared = Dense(256, activation='relu')(input_layer)
shared = Dropout(0.2)(shared)
shared = Dense(128, activation='relu')(shared)
shared = Dropout(0.2)(shared)
shared = Dense(64, activation='relu')(shared)

age_output = Dense(1, activation='linear', name='age')(shared)
gender_output = Dense(1, activation='sigmoid', name='gender')(shared)
handedness_output = Dense(1, activation='sigmoid', name='handedness')(shared)

model = Model(inputs=input_layer, outputs=[age_output, gender_output, handedness_output])

model.compile(
    optimizer='adam',
    loss={
        'age': 'mse',
        'gender': 'binary_crossentropy',
        'handedness': 'binary_crossentropy',
    },
    loss_weights={
        'age': 1.0,
        'gender': 0.5,
        'handedness': 0.5,
    },
    metrics={
        'age': 'mae',
        'gender': 'accuracy',
        'handedness': 'accuracy',
    }
)

model.summary()


# Retraining the model after adding Dropout layers.
# Keeping the same training parameters to compare performance differences.


In [None]:
history = model.fit(
    X_train,
    {
        'age': y_age_train,
        'gender': y_gender_train,
        'handedness': y_handedness_train,
    },
    validation_data=(
        X_test,
        {
            'age': y_age_test,
            'gender': y_gender_test,
            'handedness': y_handedness_test,
        }
    ),
    epochs=50,
    batch_size=32
)


Epoch 1/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 24ms/step - age_loss: 785.9744 - age_mae: 24.9855 - gender_accuracy: 0.3293 - gender_loss: 1.3512 - handedness_accuracy: 0.3139 - handedness_loss: 0.8897 - loss: 787.0948 - val_age_loss: 287.0922 - val_age_mae: 11.5305 - val_gender_accuracy: 0.7364 - val_gender_loss: 0.6932 - val_handedness_accuracy: 0.8932 - val_handedness_loss: 0.3627 - val_loss: 288.1781
Epoch 2/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - age_loss: 266.4133 - age_mae: 12.0482 - gender_accuracy: 0.6683 - gender_loss: 0.7784 - handedness_accuracy: 0.8860 - handedness_loss: 0.4150 - loss: 267.0099 - val_age_loss: 223.3013 - val_age_mae: 10.0408 - val_gender_accuracy: 0.6909 - val_gender_loss: 0.6364 - val_handedness_accuracy: 0.8909 - val_handedness_loss: 0.3404 - val_loss: 224.0950
Epoch 3/50
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - age_loss: 193.4554 - age_mae: 10.3855 - gende

# Evaluating the model's performance after adding Dropout.
# Checking if regularization improves generalization on test data.


In [None]:
# Évaluer les performances sur le jeu de test
loss, age_loss, gender_loss, handedness_loss, age_mae, gender_acc, handedness_acc = model.evaluate(
    X_test,
    {"age": y_age_test, "gender": y_gender_test, "handedness": y_handedness_test},
)

print(f"MAE pour l'âge : {age_mae}")
print(f"Précision pour le genre : {gender_acc}")
print(f"Précision pour handedness : {handedness_acc}")


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - age_loss: 86.0497 - age_mae: 6.7676 - gender_accuracy: 0.7340 - gender_loss: 0.2909 - handedness_accuracy: 0.8932 - handedness_loss: 0.1484 - loss: 86.5472   
MAE pour l'âge : 6.617171764373779
Précision pour le genre : 0.7340909242630005
Précision pour handedness : 0.8931818008422852


# Comparing performance with traditional Machine Learning models (Random Forest and Decision Tree).
# Separate models are trained for each task (age, gender, and handedness).
# Accuracy is measured for classification tasks, and Mean Absolute Error (MAE) for age prediction.


In [None]:
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_absolute_error

X_train, X_test, y_age_train, y_age_test = train_test_split(X, y_age, test_size=0.2, random_state=42)
_, _, y_gender_train, y_gender_test = train_test_split(X, y_gender, test_size=0.2, random_state=42)
_, _, y_handedness_train, y_handedness_test = train_test_split(X, y_handedness, test_size=0.2, random_state=42)

clf_gender = RandomForestClassifier(n_estimators=100, max_depth=4, random_state=42)
clf_gender.fit(X_train, y_gender_train)
y_gender_pred = clf_gender.predict(X_test)
accuracy_gender = accuracy_score(y_gender_test, y_gender_pred)
print(f"Accuracy for Gender (Decision Tree): {accuracy_gender:.2f}")

clf_handedness = RandomForestClassifier(n_estimators=100, max_depth=4, random_state=42)
clf_handedness.fit(X_train, y_handedness_train)
y_handedness_pred = clf_handedness.predict(X_test)
accuracy_handedness = accuracy_score(y_handedness_test, y_handedness_pred)
print(f"Accuracy for Handedness (Decision Tree): {accuracy_handedness:.2f}")

reg_age = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)
reg_age.fit(X_train, y_age_train)
y_age_pred = reg_age.predict(X_test)
mae_age = mean_absolute_error(y_age_test, y_age_pred)
print(f"Mean Absolute Error for Age (Decision Tree): {mae_age:.2f}")


Accuracy for Gender (Decision Tree): 0.75
Accuracy for Handedness (Decision Tree): 0.91
Mean Absolute Error for Age (Decision Tree): 6.58
