#BioSec – Behavioral Biometric Authentication System

##GOAL: Allow only the owner based on typing behavior

###STEP 1: Import Required Libraries

In [None]:
#Data Handling
import numpy as np
import pandas as pd

#Visuvalisation
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing & Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Save/Load model
import joblib
import pickle

# Colab file uploader
from google.colab import files

###STEP 2: Mounting of Google Drive and Loading of Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')



Mounted at /content/drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')

###STEP 3: Preprocessing of the Data

In [None]:
# Drop timestamp if present
df.drop(columns=['timestamp'], inplace=True, errors='ignore')


# Label encoding: Owner = 1, Others = 0
df['label'] = df['username'].apply(lambda x: 1 if x.lower() == 'Shreemathi' else 0)

# Drop username column (to avoid data leakage)
df.drop(columns=['username'], inplace=True)

# Check for missing values
df.dropna(inplace=True)

# Split features and target
X = df.drop('label', axis=1)
y = df['label']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

###STEP 4: Split Data for Training and Testing

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y)


###STEP 5: Train the Machine Learning Model

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

###STEP 6: Evaluate the Model

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Accuracy
print(" Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
print("\n Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Get unique labels from y_test and convert them to string (for target_names)
unique_labels = sorted(list(set(y_test)))
target_names = [str(label) for label in unique_labels]

# Classification Report
print("\n Classification Report:\n", classification_report(
    y_test, y_pred,
    labels=unique_labels,
    target_names=target_names
))


 Accuracy: 1.0

 Confusion Matrix:
 [[11]]

 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11

    accuracy                           1.00        11
   macro avg       1.00      1.00      1.00        11
weighted avg       1.00      1.00      1.00        11





###STEP 7: Save the Trained Model & Scaler

In [None]:
# Save model and scaler
joblib.dump(model, "biosec_model.pkl")
joblib.dump(scaler, "biosec_scaler.pkl")

# Download the files if needed
files.download("biosec_model.pkl")
files.download("biosec_scaler.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import pickle

# Save model
with open('biosec_model.pkl', 'wb') as f:
    pickle.dump(model, f)

# Save scaler
with open('biosec_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [None]:
df.head()

Unnamed: 0,char_index,dwell_time,flight_time,is_owner,total_time_taken_sec,typing_speed_chars_per_sec,wpm,label
0,1,0.15,-0.149,False,2.03,14.3,88.77,0
1,2,0.15,1.875,False,2.03,14.3,88.77,0
2,1,0.15,-0.15,True,27.53,1.05,6.54,0
3,2,0.15,9.242,True,27.53,1.05,6.54,0
4,3,0.15,1.531,True,27.53,1.05,6.54,0
