In [1]:
# 📦 Required Imports
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from deap import base, creator, tools, algorithms
import pickle
import h5py
import os

# 📁 Load Data
csv_path = r"C:\Users\sagni\Downloads\KeyStrock Security\DSL-StrongPasswordData.csv"
data = pd.read_csv(csv_path)

# 🎯 Target Label Encoding
label_encoder = LabelEncoder()
data['subject'] = label_encoder.fit_transform(data['subject'])

# 🎯 y = target, X = features
y = data['subject']
X = data.drop(columns=['subject', 'sessionIndex', 'rep'])  # Remove metadata

# 📊 Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# 🧬 Evolutionary Algorithm Setup
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()

n_features = X_train.shape[1]

toolbox.register("attr_bool", lambda: random.randint(0, 1))
toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, n=n_features)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# 📈 Fitness Function
def eval_individual(individual):
    selected_features = [i for i, bit in enumerate(individual) if bit == 1]
    if len(selected_features) < 5:
        return 0.,
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    scores = cross_val_score(clf, X_train.iloc[:, selected_features], y_train, cv=3)
    return scores.mean(),

toolbox.register("evaluate", eval_individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)

# 🚀 Run Evolution
pop = toolbox.population(n=30)
hof = tools.HallOfFame(1)
algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=20, halloffame=hof, verbose=True)

# 🧠 Train Final Model with Best Features
best_features = [i for i, bit in enumerate(hof[0]) if bit == 1]
final_model = RandomForestClassifier(n_estimators=100, random_state=42)
final_model.fit(X_train.iloc[:, best_features], y_train)

# ✅ Evaluate Accuracy
y_pred = final_model.predict(X_test.iloc[:, best_features])
acc = accuracy_score(y_test, y_pred)
print(f"✅ Final Accuracy: {acc:.4f} using {len(best_features)} features")

# 💾 Save Model + Features to .h5
output_h5 = r"C:\Users\sagni\Downloads\KeyStrock Security\keystroke_rf_model.h5"
model_bytes = pickle.dumps(final_model)
selected_features_np = np.array(best_features)

with h5py.File(output_h5, 'w') as hf:
    hf.create_dataset("rf_model", data=np.void(model_bytes))
    hf.create_dataset("selected_features", data=selected_features_np)

print(f"🎉 Model saved to: {output_h5}")


gen	nevals
0  	30    
1  	14    
2  	13    
3  	19    
4  	22    
5  	21    
6  	21    
7  	21    
8  	19    
9  	24    
10 	14    
11 	21    
12 	18    
13 	18    
14 	17    
15 	12    
16 	22    
17 	18    
18 	13    
19 	19    
20 	20    
✅ Final Accuracy: 0.9299 using 27 features
🎉 Model saved to: C:\Users\sagni\Downloads\KeyStrock Security\keystroke_rf_model.h5
